{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## This is the Sentiment Analysis for Amazon Reviews of the products listed on Amazon. The reviews are entered by actual customers. The review has attributes like stars, the actual text entered, we are generating Sentiment Polarity Score basis the data."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 84,
   "metadata": {},
   "outputs": [],
   "source": [
    "#### Loading all the required libraries here\n",
    "from lxml import html  \n",
    "import requests\n",
    "import pandas as pd\n",
    "from nltk.corpus import stopwords\n",
    "from textblob import TextBlob\n",
    "import matplotlib.pyplot as plt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "import sys\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import matplotlib\n",
    "import matplotlib.pyplot as plt\n",
    "import sklearn\n",
    "import scikitplot as skplt\n",
    "import nltk\n",
    "#to ignore warnings\n",
    "import warnings\n",
    "warnings.filterwarnings(\"ignore\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Download all the stopwords package here"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[nltk_data] Downloading package stopwords to\n",
      "[nltk_data]     /Users/vaibhavverdhan/nltk_data...\n",
      "[nltk_data]   Package stopwords is already up-to-date!\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "True"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "nltk.download('stopwords')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[nltk_data] Downloading package punkt to\n",
      "[nltk_data]     /Users/vaibhavverdhan/nltk_data...\n",
      "[nltk_data]   Package punkt is already up-to-date!\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "True"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "nltk.download('punkt')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[nltk_data] Downloading package wordnet to\n",
      "[nltk_data]     /Users/vaibhavverdhan/nltk_data...\n",
      "[nltk_data]   Package wordnet is already up-to-date!\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "True"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "nltk.download('wordnet')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "xpath_reviews = '//div[@data-hook=\"review\"]'\n",
    "reviews = parser.xpath(xpath_reviews)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "xpath_rating  = './/i[@data-hook=\"review-star-rating\"]//text()' \n",
    "xpath_title   = './/a[@data-hook=\"review-title\"]//text()'\n",
    "xpath_author  = './/a[@data-hook=\"review-author\"]//text()'\n",
    "xpath_date    = './/span[@data-hook=\"review-date\"]//text()'\n",
    "xpath_body    = './/span[@data-hook=\"review-body\"]//text()'\n",
    "xpath_helpful = './/span[@data-hook=\"helpful-vote-statement\"]//text()'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Create a dataframe here. \n",
    "\n",
    "reviews_df = pd.DataFrame()\n",
    "for review in reviews:\n",
    "    rating  = review.xpath(xpath_rating)\n",
    "    title   = review.xpath(xpath_title)\n",
    "    author  = review.xpath(xpath_author)\n",
    "    date    = review.xpath(xpath_date)\n",
    "    body    = review.xpath(xpath_body)\n",
    "    helpful = review.xpath(xpath_helpful)\n",
    "\n",
    "    review_dict = {'rating': rating,\n",
    "                   'title': title,\n",
    "                   'author': author,             \n",
    "                   'date': date,\n",
    "                   'body': body,\n",
    "                   'helpful': helpful}\n",
    "    reviews_df = reviews_df.append(review_dict, ignore_index=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "all_reviews = pd.DataFrame()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Fill the values of the reviews here. . \n",
    "\n",
    "for i in range(1,90):\n",
    "    amazon_url = 'https://www.amazon.co.uk/Hive-Heating-Thermostat-Professional-Installation/product-reviews/B011B3J6KY/ref=cm_cr_othr_d_show_all?ie=UTF8&reviewerType=all_reviews&pageNumber='+str(i)\n",
    "    headers = {'User-Agent': user_agent}\n",
    "    page = requests.get(amazon_url, headers = headers)\n",
    "    parser = html.fromstring(page.content)\n",
    "    xpath_reviews = '//div[@data-hook=\"review\"]'\n",
    "    reviews = parser.xpath(xpath_reviews)\n",
    "    reviews_df = pd.DataFrame()\n",
    "    xpath_rating  = './/i[@data-hook=\"review-star-rating\"]//text()' \n",
    "    xpath_title   = './/a[@data-hook=\"review-title\"]//text()'\n",
    "    xpath_author  = './/a[@data-hook=\"review-author\"]//text()'\n",
    "    xpath_date    = './/span[@data-hook=\"review-date\"]//text()'\n",
    "    xpath_body    = './/span[@data-hook=\"review-body\"]//text()'\n",
    "    xpath_helpful = './/span[@data-hook=\"helpful-vote-statement\"]//text()'\n",
    "    #print(i)\n",
    "    for review in reviews:\n",
    "        rating  = review.xpath(xpath_rating)\n",
    "        title   = review.xpath(xpath_title)\n",
    "        author  = review.xpath(xpath_author)\n",
    "        date    = review.xpath(xpath_date)\n",
    "        body    = review.xpath(xpath_body)\n",
    "        helpful = review.xpath(xpath_helpful)\n",
    "\n",
    "        review_dict = {'rating': rating,\n",
    "                       'title': title,\n",
    "                       'author': author,             \n",
    "                       'date': date,\n",
    "                       'body': body,\n",
    "                       'helpful': helpful}\n",
    "        reviews_df = reviews_df.append(review_dict, ignore_index=True)\n",
    "    #print(reviews_df)\n",
    "    all_reviews = all_reviews.append(reviews_df)\n",
    "    \n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>author</th>\n",
       "      <th>body</th>\n",
       "      <th>date</th>\n",
       "      <th>helpful</th>\n",
       "      <th>rating</th>\n",
       "      <th>title</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>[]</td>\n",
       "      <td>[\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n  \\n  \\n    , \\n  So I al...</td>\n",
       "      <td>[Reviewed in the United Kingdom on 3 September...</td>\n",
       "      <td>[2 people found this helpful]</td>\n",
       "      <td>[5.0 out of 5 stars]</td>\n",
       "      <td>[\\n\\n\\n\\n\\n\\n\\n\\n  \\n  \\n    , Great for those...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>[]</td>\n",
       "      <td>[\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n  \\n  \\n    , \\n  I can’t...</td>\n",
       "      <td>[Reviewed in the United Kingdom on 7 December ...</td>\n",
       "      <td>[]</td>\n",
       "      <td>[4.0 out of 5 stars]</td>\n",
       "      <td>[\\n\\n\\n\\n\\n\\n\\n\\n  \\n  \\n    , Great system, \\...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>[]</td>\n",
       "      <td>[\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n  \\n  \\n    , \\n  I’ve ne...</td>\n",
       "      <td>[Reviewed in the United Kingdom on 10 January ...</td>\n",
       "      <td>[One person found this helpful]</td>\n",
       "      <td>[5.0 out of 5 stars]</td>\n",
       "      <td>[\\n\\n\\n\\n\\n\\n\\n\\n  \\n  \\n    , Easy to fit if ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>[]</td>\n",
       "      <td>[\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n  \\n  \\n    , \\n  I have ...</td>\n",
       "      <td>[Reviewed in the United Kingdom on 2 December ...</td>\n",
       "      <td>[]</td>\n",
       "      <td>[5.0 out of 5 stars]</td>\n",
       "      <td>[\\n\\n\\n\\n\\n\\n\\n\\n  \\n  \\n    , Installation is...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>[]</td>\n",
       "      <td>[\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n  \\n  \\n    , \\n  Brought...</td>\n",
       "      <td>[Reviewed in the United Kingdom on 13 August 2...</td>\n",
       "      <td>[6 people found this helpful]</td>\n",
       "      <td>[4.0 out of 5 stars]</td>\n",
       "      <td>[\\n\\n\\n\\n\\n\\n\\n\\n  \\n  \\n    , Looks the part,...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  author                                               body  \\\n",
       "0     []  [\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n  \\n  \\n    , \\n  So I al...   \n",
       "1     []  [\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n  \\n  \\n    , \\n  I can’t...   \n",
       "2     []  [\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n  \\n  \\n    , \\n  I’ve ne...   \n",
       "3     []  [\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n  \\n  \\n    , \\n  I have ...   \n",
       "4     []  [\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n  \\n  \\n    , \\n  Brought...   \n",
       "\n",
       "                                                date  \\\n",
       "0  [Reviewed in the United Kingdom on 3 September...   \n",
       "1  [Reviewed in the United Kingdom on 7 December ...   \n",
       "2  [Reviewed in the United Kingdom on 10 January ...   \n",
       "3  [Reviewed in the United Kingdom on 2 December ...   \n",
       "4  [Reviewed in the United Kingdom on 13 August 2...   \n",
       "\n",
       "                           helpful                rating  \\\n",
       "0    [2 people found this helpful]  [5.0 out of 5 stars]   \n",
       "1                               []  [4.0 out of 5 stars]   \n",
       "2  [One person found this helpful]  [5.0 out of 5 stars]   \n",
       "3                               []  [5.0 out of 5 stars]   \n",
       "4    [6 people found this helpful]  [4.0 out of 5 stars]   \n",
       "\n",
       "                                               title  \n",
       "0  [\\n\\n\\n\\n\\n\\n\\n\\n  \\n  \\n    , Great for those...  \n",
       "1  [\\n\\n\\n\\n\\n\\n\\n\\n  \\n  \\n    , Great system, \\...  \n",
       "2  [\\n\\n\\n\\n\\n\\n\\n\\n  \\n  \\n    , Easy to fit if ...  \n",
       "3  [\\n\\n\\n\\n\\n\\n\\n\\n  \\n  \\n    , Installation is...  \n",
       "4  [\\n\\n\\n\\n\\n\\n\\n\\n  \\n  \\n    , Looks the part,...  "
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "all_reviews.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Define a path here"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "out_folder = '/Users/vaibhavverdhan/Book/UnsupervisedLearningBookFinal/'"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Save the reviews locally"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "all_reviews.to_csv(out_folder + 'Reviews.csv')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Load the data and start the analysis"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Load the data now and analyse it\n",
    "data_path = '/Users/vaibhavverdhan/Book/UnsupervisedLearningBookFinal/'\n",
    "reviewDataCSV = 'Reviews.csv'\n",
    "reviewData = (pd.read_csv(data_path+reviewDataCSV,index_col=0,))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(167, 6)"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "reviewData.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([\"['5.0 out of 5 stars']\", \"['4.0 out of 5 stars']\",\n",
       "       \"['2.0 out of 5 stars']\", \"['1.0 out of 5 stars']\",\n",
       "       \"['3.0 out of 5 stars']\"], dtype=object)"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "reviewData.rating.unique()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['5.0 out of 5 stars']    123\n",
       "['1.0 out of 5 stars']     24\n",
       "['4.0 out of 5 stars']     12\n",
       "['2.0 out of 5 stars']      6\n",
       "['3.0 out of 5 stars']      2\n",
       "Name: rating, dtype: int64"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "reviewData.rating.value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "24"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "reviewData.rating.value_counts()[1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "\"['5.0 out of 5 stars']\""
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "reviewData.rating.unique()[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAV0AAADrCAYAAADKbEVrAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy86wFpkAAAACXBIWXMAAAsTAAALEwEAmpwYAAA3M0lEQVR4nO3dd5xU1f3/8de5M7O90Ys0ERlFQKQoCiqiolEsGLspatTEb4rpv8QkApqIJhqNxBasMfZoLChKB5VeBYGhLm0L2+v0Ob8/7iwssLtsmZk7s/t5Ph77AGfv3Pks7r73zrnnfI7SWiOEECI2DKsLEEKIjkRCVwghYkhCVwghYkhCVwghYkhCVwghYkhCVwghYkhCVwghYkhCVwghYkhCV4gIUUq9pJQ6pJTa3MQxTqXUYqXUBqXUVqXUv8KPj1BKXRG7aoVVJHQTgFLKppRar5Sa3cjnxyqlVtb7QZ4WfnyCUuq8mBbbsb0CXH6CY54CntBaj9Banw7MDD8+AmhR6Cql7C0tUFhP/qclhvuArUBWI59/FbhRa71RKWUDnOHHJwDVwLLmvpBSyq61DrSh1g5La71UKTXgBIf1Ag7Ue84mpVQS8CCQqpQaD8wA9gD/AFIAN3CH1tqllLoduA7IAGzAhZH+OkR0SejGOaVUH+BK4C/ALxs5rDuQD6C1DgJbwj/8PwKCSqnvAD8FcoA/AklACXCb1rowfGV8CjAQ2KeU+jPwcvg4A/i21npHNL6+DugJYKFSahkwF3hZa12ulHoAGK21/gmAUioLOF9rHVBKXQI8DHw7fI6RwHCtdakF9Ys2ktCNf08CvwUymzjmCcCllFoMfAa8qrXOVUo9B1RrrR8DUEp1AsZqrbVS6q7weX8VPscQYLzW2q2Umgn8Q2v9evgqzBaNL6wj0lq/rJT6HHMY4hrgh0qpMxs4NBt4VSl1KqABR73PzZPATVwyphvHlFKTgUNa67VNHae1fhAYjXnldCtm8DakD/C5UmoT8BvgjHqf+0hr7Q7/fTlwv1Lq/wH96z0uIkBrnae1fklrfQ0QAIY2cNhDwCKt9VDgKsxhhjo1MShTRImEbnwbB1ytlMoF3gImKqX+09CBWutdWutngYuBM5VSXRo4bCbwT631MOCHNPKDrLV+A7gacyzxU6XUxEh8MQKUUpcrpRzhv/cEugAHgSqOfjeTHX4c4PZY1iiiS0I3jmmtf6+17qO1HgDcDCzUWn/n2OOUUlcqpVT4P08FgkA5Tf8gf7+x11VKDQR2a62fAj4EhrfxS+kQlFJvYr5LcCqlDiilftDAYZOAzUqpjcDnwG+01gXAImBIeAbKTcBfgRlKqfXIMGC7oqSJeWJQSk0Afq21ntzA597CvLlSi/l29Q9a68+VUoOB/wIhzBtpnTHHf8uAhcAYrfWE8I20+mO/vwO+C/iBAuDWaI8hzlo3ysC8s9/7mI/OmKFj1PtQx/x33YcPyMP8xVL/I//ukWv90axfiOaS0BWxNW1KN+BszCvyQZizJgb57CH3q1ccGBalVw0Bhzg6iHcBK4C1d49c64nS6wpxHAldEV3TpgwExgPnhz+cDR3mDgVr/3PtwbRYlhbmBzZgDgssB5bfPXLtXgvqEB2EhK6InGlTDMw78edjzjEdr5Q6qblPf+GyfX6dfNTUKKvkY14FLw//ufLukWt91pYk2gsJXdE206ZkAddiLqC4QCmV09pT/WfM/jJ3L90pUqVFUCUwG3gfmHP3yLW1FtcjEpiErmi5aVMygKu11jcC3wovoGiz90/Nzys53d87EueKolrMedDvAB/dPXKtzGEWLSKhK5pn2hQFXKS1vhP4tlIq5URPaam53Q/t3TvW0z/S542iSszZIa8BS+4euVZ+mMQJSeiKpk2b0k9rfXtI8wObofpF86WWp5Xmbr6kekA0XyOK9mI2Hvrn3SPXFlldjIhfErqiYdOmDA2EQtNsSk1RSsVkEc0Wo2r/V5PL+sbitaLIDbwA/O3ukWv3W12MiD8SuuJo06acWesPPJJqt11Wb5VbTBwMuQs/vbaoRyxfM4r8wH+AR+8eudZldTEifkjoCgCCD1w70hsMPpJqt10S67CtUxUMVL81JS/DiteOohDmrIeH7x65dr3VxQjrSeg2IXyzaCmQjLkU9b9a66kNHDcWs+F0cvjjba31tPDSXZ/WutlNxGMt+MC1o73B4KNpDrvlTW1CWusXrtynlT02wxkW+AwzfL+wuhBhHQndJoSv+NK11tXhzlBfAvdprVccc5yLY3Zu0FpvObanQTNfMyY7NwQfuPbscNhOiPZrtcSr5+2v9HXVje2Q0V4sBn5898i1W6wuRMSehG4zKaXSMEP3Xq31ymM+V4YZtIfqPTYAczVTECiiBTs3AFHbucHzx6u7+4Khf2UlO66JxPki7Z3T8woqTg30tLqOGPADfwMekt4PHUt7fRsXMeFNITdgNkyZd2zghtXt3PA/pdQPlVIpWutc4DmObEL4BWZoj9Van4XZH/e39c4xBLhEa30L5jY7/9Baj8BsTn6Atpo2RR345eW/V6jceA1cgIwKW0dZbusA7gc2z1o3apLVxYjYkdA9Aa11MBx+fYCzlVLHdfmP950btvzh1jFlbt+2PlmpDyfbjdS2nCvaMqvsIatriLFTgM9nrRv15qx1o9rLzA3RBAndZtJal2M2mm5wi+143LnhpXlfOV6a99XzX199//zktLSBrTlHrGW77R31e/JmYNusdaN+NGvdKEtmj4jY6Kjf4M2ilOpW18BFKZUKXApsa+C4uNu54e8fzr/ASMs4kNNv4D1JOV2y5p1x/cETP8t6WX5HPHQZs0oO8Czw1ax1o6LVW1hYTEK3ab2ARUqpr4HVmGO6sxs47ruYY7obMNfh3xbeCv1jYEp4C5bzgWnAu0qptUBxE697I+aWLhswWyX+u7kFP/zmh45n5i2f1e/U0xZlde3eve7xwBnj+23P6HuoqefGg+yQPa6HP2LkXGDtrHWjfmZ1ISLyZPZCO3L/c6+eNmjYWbNzunY7paHP+8uKSq+bMy3HoXTc/rIN6FDwpav225Qh77DD/gPcI93M2o+4/eETzTdm4iR1/3P/vvOMs8etaSxwARydunVeMPjKfbGsraXsyrA5qgzZYvyI72AONwywuhARGRK6CW7MxEmOKXf/+MWzLpg4KzU9Pf1Ex1efdVnf/SndorrJZFullhrSJPxoZ2EON1xqdSGi7TpU6Cql+iqlFimltiilvlFK3dfIcU6l1OLwWOxWpdS/wo+PUEpdEduqG3ftXT/u9t1f/3HZaSPPvsNmszXr/6Vhs9mWnXdXIKiJ23Gl9Aqb1+oa4lBn4LNZ60b9zupCRNt0qNDF3J78V1rrIcBY4MdKqSENHPcURxY1nI451QtgBNCi0FVK2dtQb6O+/9sHRl3xnTvX9znl1NEtfa6jR9/uS/tdGLfDDBmV9qgvg05QBjBj1rpR781aN6q9NQbqMDpU6Gqt87XW68J/rwK2Ag1tnNiLeqvAtNabwlvSPAjcFL4CvkkpdbZSarlSar1SaplSygmglLpdKfWRUmohsEAp1UsptTT8vM3hmQytMmbiJHXvQ3+7+eLrb1nYtVfvZm/6eKzic6b0KnJkV7b2+dGUVWuXu2hNuw5YNWvdqMFWFyJarkOFbn3h3ghnAY0t612olJqjlPqFUipHa+0DHsDsIDZCa/025pzd88PLeh8AHq53jpHA9VrrCzFXqX0eXtl2JuaW3y02ZuIk2/mTp/zpwmuufzUjO6dNTWFsjqSkRWffXt2Wc0RLts8RlXcH7czpwLJZ60aNsroQ0TIdMnSVUhnAe8DPtdbHXe1prV/G/KZ+F5gArFBKJTdwqmzMebebMYO6/rLeeVrruhtWq4E7wo1thoWvsltkzMRJKeOvvPapcy+bPDUpOSUiG0Ha+zl7L+8xKu52N8gO2iO+/1o71QVYMGvdqPOsLkQ0X4cL3XCLxveA17XW7zd2nNY6T2v9ktb6Gsyx4ON6LgAPAYu01kOBq2h8We9S4ALM1WivKKW+15Kax0yclDV20pUvnvetq39ks0d2mez+827pWmFLjavZAlnKLuOVzZcNzJ21bpTl/ZBF83So0A0v1X0R2Kq1/nsTx10eDmeUUj0xrygO0vSy3tubOF9/oFBrPQtz/6yRza15zMRJ2WMnXfnChVd/++bmzlBoCXtKWur8s75TEunztkWyYUs2arXMYGi+dOCTv3xxhkwpSwAdKnSBcZhLdieGb2ptaGQK2CTMZbgbgc+B32itCzAb3gypu5EG/BWYoZRaj7mzRGMmABvDx92EucvECY2ZOCln9IUXz7rwmuu/bUQhcOuoU8/qu7HTaXnROn9rpJTa4nK8OV7V+ALbH1qy7U01XTXYkEnED1kGHKfGTJzUaWCf3v8aPKDf5D7nXlzVbeDgbtF8PX91ZdXVs/+UnKr9ERkvbqv/Dcw/WDzU3+rZGR1JfoG74uG1Lrs3FEoHPMDVeqqeZ3VdomEd7Uo3ITidzk7dMtL+e9rJ/a+z22wpB1cszCrcuTWqzWocGVmZc4denx/N12iJTJmr2ywHD7o9D69yZYQDF8z7Ch+q6WqclXWJxknoxhmn05nVp0+fx0adOexCwzAMAJthJBesWpKT79pcGM3XDgwZ1297Rr+46ESWVSOzxk7k4EG355F1LodXhWzHfCoVM3hPtaIu0TQJ3TjidDrTunbtOm3s2LG32O32o36QDMNIOrT2y855W74uiNbrK8NQ68f/wOHHCEbrNZory2M/NkhEPXl1gctxgVunC/Cpmq66xrIucWISunHC6XQ60tPT7xs/fvwdSUlJDfaUNQzDUbRhWdcDm9dHbRjA0albp/mDr7R87m52wBEXY8vxKD/P7XlkncveRODWGQR8pKYrmfccRyR044DT6TRCRuj7vjG+O5KTk5ts4m0Yhr1k06ru+zeuidpOEDUjJvXdn9Ld0k5kWdhP2DGtI8rPc3tmrHXZPYSaO/5yLvCamq5kaXWckNCNAz58NxScV/DLb3p/c+oL/hdqPD5Pkz0RDKVspVvW9ty7fmVUgtew2WzLxlnbiSzNsKXj05YPc8STgnyP55GWBW6d64FHolGTaDkJXYsppxpV5Cx6uLpf9ekA+Sn5nWeqmfay2rImb5oZStkqtm3otWf1V23fnr0Bju59ui/pf5FlnciUUiSX2Vq8XLq9Ksj3eGas2WZ3tzxw6/xWTVfXRrIm0ToSuhZSTtWPHtxfc1ZN//qP1zhq0v6Z8s+uuVW5TYaeUsqo2rn5pN0rl0ZlDLbk7Gt6FSVZ14ksrcyQLWqAggKPZ8ZaV1sCt87LaroaEImaROtJ6FpEOVUWSfyKc5mAwXE3RIJG0PZK5iv9VlatzG1qAYtSSlXv3tpn57JFEb8qNTuR3dHoyjBXcRUjnlt4+CNrxmyeXLHzqGNe/3o/w59dyLBnF3Lei0vZWFABQFGNl/EvLWXoMwv4YNuRxXDXvLWCvCozazMq7f6W1PvK9L386pKvmXbjluM+N/e1Qu4ZtY6qssan/7qrg/z2W5t441Hzd5jfF+IfP9nJtBu3sPidosPHvfbnvezdGpt2FQUFHs+M1S67WwcjMYcuB3hHTVdyk9JCEroWUE5lALdzLpNIo3NTx87JnDPgo5qPDgSDwUYDSCmlavdu77vjy/kRD15738G9l/Uc0+B5nV0z2fCjiWz40UTW3nMRaQ4bU07rfdQxJ3dKY8nt49l070T+dIGTe2ZvAODNzQf40eiTWXX3hTy5YhcAH7vyOatnNr0zzXuJmVX2UEtqPe+qzvxs5qDjHi8t8LFlRSWdezadNR8+m8fgs4702vlmeSWDRqTzwFuns+JT877i/u21hELQ//S0lpTWKoV1gUtEArfOGMzl68IiErrWuJDBXMdJnNacg9dnrO/ziv+VCq/f2+hVp1JKuffv6uda8vneyJVpOnDuzd0qbGlNXtot2FPEKZ3T6Z9zdBid17cLnVLNsBvbpxMHKs2rWIdhUOsP4g2EsBmKQCjEkyt38dtxR+bzZ3ta1lFt8MhM0rOPn0X1zt8P8O37TqKp+/d7t9ZSWRpgyNgjbYptdoXPEyIY0NS92/jw2XyuubdXS8pqlUOFUQncOvfJ+K51JHRjTDlVX7K5hxGc05Ln7U/Z3/VpnlYV7oqixo5RSuHL39t/28JPIxq89pTU1Hkjb2uyE9lbmw9wy9A+TZ7nxfV7+dagHgDcOqwPH7ryufS1Zdw/3skzq/fw3eF9SavXvzzb3/a5uhsWl5PTzUHfwY1fmYZCmnefOMANPz+61cOQc7IoyfPxyO0uJt7cnQ1Lyul3Wio53aL77vxQocczY5XLXhudwK3zspquep/4MBFpEroxpJwqBcW9jOcC7LR4wnqlozJ9ZvLMTvur9zc5Y8F/6ED/rfNn57a60AYYg87qu6HT6Q1OUfMFQ3zkKuCGIY3/DC/aU8SL6/fy6CVmn/fsFAef3Houa+6ZwMhe2Xy8vYDrh5zE3R+t5/p3VrF8fylZ2t6m9/Bed4hPXyrg6h81nS2L3y1i6LgsOvU4OkxtdsVdD5/Mn944nVGXdGLBG4eY9J0evPP3Azz3291sWFLelvIaVHTI65mxymWriW7ggjm++3SUX0M0QEI3RpRTKeAGhjOBbFp9hREwAvYX01/ss65yXW6TxxXnDfhm7odNHtNSrvO/n+VWDt+xj8/ZUcjIXtn0yGj498jXhRXc9fF6Prx5LF3Sjr9KfGipiz+cP5g3Nx1gfL8uvHrtSKYt2UaGYc/Qoda3wSs64KUkz8dDt2zl95M3U3bIx59v20pF8dHD47u/rmHR20X8fvJm3n3yACs+KeH9p47+/bLk3SLOvbILuzfVkJph454ZJzPvP5FtU2EG7jZbDUFHRE/cuGvVdPXtGL2WCJPQjZ1hZDAZJ2e1+UwKPsr6aMCcqjn7Q6FQo7fjQ6WFAzbPeT+3za8X5kjPypw79IbjliC/2cTQwr6KWq57exWvTRnF4C7Hbwixo6SaA5VuJgzoRq0/iKFAKXD7g9iUMpIqVKv76vY5NZXH5w9nxuyhzJg9lE7dk/jj66eT3fXoTLvrLyfz6KfDmDF7KDf8vA9jr+zCdT87MtRQUxng6y8rGDu5Mz5PyBwbVuD3tug+X5PqArdaxyxw68xU01VOjF+zQ5PQjQHlVKnAHZzL8NYMKzRmZebKvq95Xiv1+X2N3uTSFcUDNn3ybm6kXjMw5Lx+rsz+hxdu1PgCzNt9iOtOP3Lx/tyaPTy3Zg8ADy5xUeL28X+fbGTEcwsZ/a/FR53vDwu38JeJQwC4ZVgfnl2Ty5hZS7jvnIEApJbZmj03a9b9e3jkdhcFuR5++61NfPlBcaPH5m6p4d8PNm/oe/asAq64syeGoTjj3Cx2rK9m+k1bGXtFkxNPmq3YusAFc+frxyx43Q5LmpjHgHKq6xnADziPqHT17+TrVPmD0A/8GSkZXRo7Rqdn5Q6/6uYBkXg9f3lx2ZQ507OSCEa9E9jsPoX780d6+0b7daxSXOT1zli5zaiyJnDrm6in6kUW19AhyJVulCmn6ouNqxjJ2dF6jbKksqynHE9lFtQUNNqLQdVUDtj44RsRmdXgyOnaaf7gK6Oy/PhYmVX2dtt/oaTI652xwhUPgQvwnJqupIlxDEjoRpFyKhvwPUZzGilNL4JoK5/Nl/R82vO9N1dubjRYDXd1/w3/e32vx13L/7vhCn55zSXcN3kCbz31twaP/2rOR9x35YXcN3kCT/zq/wA4uHsnv7nuMu6d9tf+/y0IVgAEQiEu+fdX1Pojv9lDdq29XXbHKin2eh9e4TKqCMRD4AIMBu6xuoiOQIYXokg51XjS+SmTmYKNmP1wjasat+/i9ItPMgyjwbf/fkfKPuekKX3TMzNVwO/nj7ddy533P8jgEaMOH5OXu5u//+KHTHvlXTKyc6goKSa7S1denjGNsZO+RfeT+vL8/b/wrrykS9Izq3apzGQ7t4/o39DLtckeXZM3/5qSdjWftKTYvMKt1HETuHUOAafoqVo2BY0iudKNEuVUmcCtjGZgLAMX4KvMr/q96XmzyB/wN9gwxuH39Nv++fsHQoGADgb8BAJ+jl2uNf/d17n81tvJyM4BILuLuQGB3WHH63bjdbtJyemU/HHXcw58vL2A753ZLypfS1bI0WR/4URTWuyL18AF6A782uoi2jsJ3eiZRDad6RWBKWKtsCNtR8/ngs/5arw1ZQ193vC5+947cXTgjvOGc+Z5FzD4zJFHfT4vdzd5ubu5/5ar+d1Nk1n/hXmP5fJbb+f952cy83c/57of/owXVu886Z4Lz6oxotQjO0vZj59nlqBKS3zeGSu2xWvg1vmVmq56Wl1EeyahGwXKqToBlzMaZ0MdxGKlJLkke6Z9ZtqhmkN5x37OMAx+8n//5/jxfT8/uH3jOvZt33bU50OBIPl79/Dgv9/jF48/w7N/+jU1lRV0692HB197jxlvf0xySiplxYeMkst/VP3d/63hpv+uZntJZN+ZOgzDYasiNi29oqisxOedsXybqojvwAXIAKZaXUR7JqEbHZfThWy6M9zqQjw2T/Kzac/22la1rcEbbFkpSSflpCRVrlk876iZ/l169mLMRZOwOxz06NOP3gNOIX/vnqOe+8aTj3DLff+PuQsX9xg14dJDf73kDKYvOTq8I6Elc3XjUVmJz/uwGbiJ0lLxLjVdHd+uTUSEhG6EKafqDlzKKIagLP73/QD4K+hntHor863+S6qW7NVah2pqanC73Xz99dc888wzrF6xPOvTV54P7fh6fQCgorSELWtX8vIj01g5fw6VZSXk5e7iraf+RmmhuRnxN6uW07l7T3oPGIjP46Z8+KXZVbZUd60/8jO80sptnoifNEbKShMucAHswC+tLqK9ktkLEaac6k66MIlJ3IjC2ulOuUAS8D/gx+ZDQ2qG5I8rH9fl448/TvJ6vRiGwbBhwzjppJP44MMPQ/c+9LguLSq0ZWTnsOPrDSx470169u3PiPMnkJKaxk0//TVaax78wc388u/PkZnTiQO7dvDkb36Mr6rC//YlAxzj+jW6RqNVFnYuzt01vnZARE8aA+WlPu/Dy1yqXPsTKXDruIH+eqputKudaB2ZDB1Byql6A+czjJMtD1yAAcAxt9G2pG/pVWIvKbv9ztvdqUmp2XWPu91uDKWM5PL8ArTq5vV4bDf/7Ncc2LWdP73wBg/ddSu/f/YVwGwhOfWltw+fs88pp/LY+3MBHGmfP32Q0m+O7pHYRpk1iTdXN8EDFyAV81f1NIvraHdkeCGyLiEZ6MEIqwtpSmFyYaeZxsykktqSgrrH1q1bx6BBg3AoeuYEa0tWzJ0devDOm7nuhz/lszde4YKrv01y6ok7LW4f/92s2gY6kbVFttdu2c3I1igvS/jArfNjNV21qyl78UBCN0KUU2UBFzCc3tiI+x+2Wntt6tMpT3ffVbVr3549e1i/fj2XXnopAFlpqd0nX3xR8UP//q934JBhrFk8j3Mvm8yzf/o1f/vZ3bjWr2n0vI70rMy5w248rhNZW2QHHRFrEhRt5WV+74z2EbgAXYHbrS6ivZEx3QhRTnUZipu4jmtJppPV9RxWBrzB4THd4xRAyuspgbu+f5eta9euR72N94d08dode3LOvfxqe/7ePdgdDs69bDJ//ekPeODFNxt9Sa01Z3zyt8LTqnJ7ROJLcIeC7v9cezDur7gqyv3eGV9uo1T7k62uJYJ2Ak49VUeuj2UHJ1e6EaCcygFcgZOsuArcEykH3gbPDR77Fylf5B27+WVlWWnXPetXhQaefkat1+NGGQZKgc/b9GQCpRQbx/8gyYctIlMZUg1bqvLQop2BYy0cuKqdBS7AIGCi1UW0JxK6kTEMyOaUOBvL/S/wIlACPA6sA1aHPwCWYN6j/gQ2vrbxpBnPzzA8Pk9V3dMXLFjApEsvTdr28ZueUeMvrJ375qv89oYruPK7d53wpR05XTrNd07eH6kvJaXUqDrxUdaoKPf7wle47WFIoSHft7qA9kSGF9oovA3PA2TSh8n8IC5mLbRBhj+j9q7AXdU5qTnd6z/uD4bKnN+6ISm9c5f05p4rFAyGxnz85/L+7sI2d1h7f1B+XskQf9w1vqks9/se+cqli0O+9naFW18t0EMa4USGXOm2XT9gAKfTJ9EDF6DaUZ02M3lml71Ve4+6SnXYjE6uOe/6q4qLmn3FadhsxopxdweDmjb/Zs+otMXd8EJlRYcIXIA04Hqri2gvJHTbbjQQpBfDrC4kUoJG0PZy5st9V1euzq3/TshhM3J2fv5eqLKosLK553J0691t8YCL97W1pqxqe1y9Jaus8Pse+bJDBG4dGWKIEAndNgg3Kb+QrgRIJ6ILAuLBJ1mfDJhdM/tAIBg8fOfabjOyd839HxWF+RXNPU/Z2Vf3Lkzq1OygbkiWxxE3c3WrKvy+R7900YECF+BCNV1FvmFyBySh2zYDgQwGM9jqQqJlbcbaPv/2vVrq8XoOL3iw24ys3fM/tJXnHyxvzjkMu8Ox+Jw7atpSR3bAHhc3qarMK1yKQr64qCeGFHCb1UW0BxK6bXMOEKBH+xlaaMi+1H1dn1XP+spqyg93+7LbjIw9Cz92lB3c12C/3mM5+gzq9VWvc1o9zJClHc2+gRctVZUB36MdM3DrXGV1Ae2BhG4rhefmjqMHmlS6Wl1PtFUkVWQ8k/q0Y2/5vsPDCnabkb538afJJftzS5pzjoPn3ti93JbWqjaN6YYtnYB1E/SrKwO+R7/Ypg913MAFOFtNV+3+ez3aJHRbzwmk0J/o7FMTh/yG3/Fy9kvZ60rXH766tdmMtP1ffJZWvHdn8Ymeb09OTZk/6nulrXltQymVVGZYMmWpujLge+SLbRzqWGO4DTGAy60uItFJ6LbeSMBHV06xupCYUvBR5w87zSn7rDwYCmoAm2GkHvhqfkbR7u0nbANonDK8z/ouZzS6VXxT0sqNmDczr64K+B790tXRr3Dru8LqAhKdhG4rhBdEjMJGBZl0yDu6KzutyHmj5o1qr88bALAZRsrBFQuzCnduPXSi5+4Y973sGiPJ29LXzKiwR7R72YnUVAX8f13q0oVBb0e/wq3vMjVdSW60gfzjtU5PIJMBdEuEjmLRsitzV+bz+vlARW2lF8BmGMkFq5bk5Lu+KWzqefb0zIx5w24saOqYhmRW2WM2pltTFfA9utQVKghJ4B6jMzDW6iISmYRu6wwEoHcHG1poQGlyacozSU/bDlbk1QIYhpF0aO0XnfO2ft1kqAZPP7f/tsyTmwznY2W5HTH5fq2pDvgfXerSEriNuszqAhKZhG7rjAZq6BwO3w7Oa/faX8ialbapdHMNgGEYjqL1y7oe2Ly+0b66Sik2nn9nizqRZfvsUd9Jt6Y64P+rXOGeyDirC0hkErotFJ4qdgYOqkijl9X1xAutNO91/m/6gtKFNSGtQ4Zh2Es2req2/+s1x23/XseR3aXTPOfVze5Elq3tJ966og1qw4GbL2O4J3K2mq7iZoVgopHQbbn+gI3edLN8t9849EXnpelvV73tCwQDXkMpe+k3a3vsXb+y0dkKnhET++1N69mseb4Zhj1Dh6LTgqG2RgK3BTKhfS8IiiYJjZbrCyi6yVVuY1xZ21L+5f2XqvW5Kw2lbBXbNvTKXbPsQEPHKsNmrDjvrlCQE3cisyvD5qgy2rScuCG1NQH/35ZsD+VJ4LbEGKsLSFQSui3nBNxkS+g25VDaoaSZ6qmMYndJkVLKqNyxqffuVV80OJTg6Na726IBlzRriXBqaWTn6tbWBP2PLdkePBj0SOC2zCirC0hUErotNwioIkNC90TcDrfxTPLT3XZU78pTShnVu7b02bV8cYPBWz7mqt4FSTkn7FyWXm5req+gltRXE/Q/tsQVPBD0JMzGl3FEQreVJHRbQDlVOtAZGz5S6X7CJwhCRojXM17r/WX1snyAmlxXnx1fLjjuqtawOxxLxt51wqvYzCp7RPZdc9cE/X+TwG2LYbJIonXkH61legIhetINA7l72wLzM+b2et/9QXFIh3zu/Tv7uZbO3XvsMY6TBvb6stfYJocZsmrtbd6dw10b9D+2VAK3jZIx72+IFpLQbZlegI0ucpXbGpvSNnZ70f+S1xvwVfjycvtvWzTnuODNP/fG7uW29EaveLN9DntbanDXBv2PLd4e3B+QwI2ADr84qDUkdFtmMOAhI4G2WY8zeckHs/6p/plc7q0o9Bfu7791/uyjgteWnJIyb/R3G+1ElhW0tzosPbXBwONLtgf3B90SuJEhodsKErot0x+oIU1Cty2q7VUpMx0zu+W69+8LFOf1/2buh7n1P28bOLzPui5DG5zbm6UcGa15TU9tMPDYku2BfQEJ3AgaZHUBiUhCt5nCncW6Ax5SyLG4nIQXNALGK6kv9ltVuyY3WFLQf/Oc94664t05/nvZNUbycZ3Ikg0j2ailRTMYPO5g4PElOyRwI0+udFtBQrf50jBvHgRIIsvqYtqLT9NmD/jY+0leoKyo98aP3jrcq8GelpExt5FOZKmlzV8g4XEHA48v3hHYG6iVwI08Cd1WkNBtvk6A2VpQQjei1qWsOenVwKsVgcrijHXvvXZ4V4rQ6WP7b80aeFzwpjZzrq7XHQz8XQI3mgZYXUAiktBtPjNo00nFoE130MXx9iXt7fpPnjbc1YeCa955qQbMTmRfj78zxYsRqH9sZqU90PBZjqgL3FwJ3GjKlrm6LSf/YM2XBRhkENVOVx1Zpb0i/SnHUzmHqvbWrnlrVk0oFMKR3Tln/mnXHNW3Iaum6d95Xk8w8MSSHYE9ErjRpoBsq4tINBK6zWd+cyV13J0iYiFg+O0vps/q9k3FusCGt14oCwWD2nPmxH576nUiy/LYG12Y4vUEA08s3hHY7ZfAjZEcqwtINBK6zdcJ8EvoxsZH2R9mL6j6LGXjuy8U6JBWq8bdrQPhPmTZAUeDzWm8nlDgicU7JXBjS6ZPtpCEbvOlA0EcErqxsiJ7RepbNa912vjeC7m2nK6dP+k9rgogC3v6scd6PaHAk4t3BHb7ayRwYyvH6gISjYRu86UioRtzO9N3pzzjmdlr9fvP7naPuTptf1J2TZphS1M+ffhmms8bCvxj8Y7ALglcK+RYXUCikdBtPgldi5Qml6X8w/94/6UfPFm6YMRNfqUUyWW2KjAD98lFO/w7JXCtkml1AYlGpj41nxm6dgldK3htfsez+tluJWsri/v1Ois3tSwvuTInEPyHGbipVtcnRHNJ6DZfChCUfdEspOCd0Otdy8ourr7BZ3f8I/cb3w4JXKtFZ9O6dkxCt/lSAQ8hItJEW7TePO+CAYvchAKG/AKMAyGrC0g08k3bfAagJXTjgwRu3JDQbSH5xm2+AKAkdIU4ioRuC0noNl8AMAhK6ApRj4RuC0noNp8f80r3hM1WhOhA5OehhSR0m8+PXOkKcaySEx8i6pPQbT5zeMGPz+pChIgjxVYXkGgkdJvPHF6optrqQoSII0VWF5BoJHSbrxawU0GV1YUIESc0MrzQYhK6zVcCJOHGSwi/1cUIEQfK9FQt9zhaSEK3+YoBBwB+GWIQAhnPbRUJ3earoG6duU+GGIQACq0uIBFJ6DZfNUdCV650hYAdVheQiCR0m+/I1a2HcuvKECJuuKwuIBFJ6DZfFebup1Ap02SEQEK3VSR0m6/uSteghEOWViJEfJDQbQUJ3WbSLh0E8oFUCimS1s2igwsAu6wuIhFJ6LbMHiAdL358Mq4rOrTdeqqW+eqtIKHbMrsxt+2BWhliEB3aVqsLSFQSui1TSN20sWoJXdGhrbC6gEQlodsyh6ibwVBGvrWlCGGpr6wuIFFJ6LZMKRAEbOxjr9XFCGEJjR9YbXUZiUpCtwW0S4cwb6ZlUEkNbll7LjogxTo9VXusLiNRSei23AYgC4AKcq0sRAiLLLO6gEQmodtyR+YmFssQg+iQJHTbQEK35fZi3kxT7JMrXdHBaDTwhdVlJDIJ3RbSLu3BHNfNpJxqPNI5X3QgitV6qpaWjm0gods6G6gb1y1jp6WVCBFbH1ldQKKT0G2dndTN190nK3NEhyKh20YSuq2zBwgBNnazDz81VhckRNRpcvVUvcnqMhKdhG4rhMd1NwBd0GhK2WZxSUJEn5Kr3EiQ0G295UAqAPvZYm0pQsSEhG4ESOi2ngtziMFgJ7kEcFtdkBBRoykCllhdRnsgodtK2qVrgK+BzoQIUSpd9EU7pnhdT9UBq8toDyR022Y5kAbAbtZbW4oQUfWK1QW0FxK6bbMVs7+uwW724ZYNK0U7FGKTnqo3Wl1GeyGh2wbapauAtUA3AA6wxtKChIgGg2etLqE9kdBtu4XUbeGzmY2EkH2jRPuhcQOvW11GeyKh23bbgRIgHTdeitlsdUFCRIzmTT1VV1pdRnsiodtG4cbmc4DOAGyXIQbRTmg0Bo9bXUZ7I6EbGasxb6jZ2Ece1Ry0uiAh2izI53qqloU/ESahGwHapSsxd0c1b6jtlH6joh2wM83qEtojCd3IWQQkA7AFF27Zol0ksADL9VS90uoy2iMJ3cjZDewAugKwS652RQKzMdXqEtorCd0I0S6tgQ+ADAA28Y0slhAJKcA3epqeZ3UZ7ZWEbmRtxey12xmNZgeLLa5HiJaz8TurS2jPJHQjKHy1+x51W/lsZgu1yH5SInH4WKmn6dlWl9GeSehG3jeY47vmvN3NfGZpNUI0l0aj+JHVZbR3EroRFr7afRfzalexk1xKpMm5SAAe3tN/1husLqO9k9CNjm2YvXZ7ALCaudKTQcS1IF4c/MTqMjoCCd0oCF/tvgkkAXZKqWA/X1pclhCN8/KkfkjL/YcYkNCNEu3S+cAnQG8AVrEML+VW1iREg/wUkiarz2JFQje65gDVQDp+AmyVm2oizmjAzR16qvZYXUpHIaEbRdqlazF7kXYHzOXBJXxjaVFC1FfJB/pxPcfqMjoSCd3oW43Zc9cM3i/5BB9VllYkBICXMtx83+oyOhqltba6hnZPOVUfYDpQCPhwMohR3GZxWfGlGHOiXZ0y4CLAjTkXRAHpwLXULT05Yg8cNXBTDFwPnI65VKUQGAxcEv78EsxfgadH8gtIMBoo4nr9tH7P6lI6GgndGFFONQm4DTMiYCJX0pPRlhYVr0LA48DdmBshpYQfXwEUAVc18dxa4Cngl0ApsBK4Bvg3cCPgBz4Gbo1G4Qmkgo/13/XVVpfREcnwQuwswOzNYM7d/Yq5eCi1tKJ4VbeeL4cjgQtmYKoTPHcLcCrmZD0bEMAM8WD4uYuACRGtNvF4KMbLd60uo6OS0I0R7dJB4EXMKEjFi591/A9NyOLS4s9mYGi9/14A/B1zuclFLXhuNyANeB5wYl75auom8XVMQQIUcIN+WldYXUpHJaEbQ9qli4BXgZ6AIpcD7GKuxWXFlwDgAs6o99jFmMMFw4FVTTy3CjgEDKr32LeAe4HzMK9yLwKWAu8AayNWdeLYz2P6Zb3Y6jI6Mgnd2FsGrOHIoomVFLHR0oriyU6gF3VdiY82DJrsYvENcBrme4ljbQuf14d5xXtj+Fy+thSbYA6xlGX8weoyOjoJ3RgLLxF+BfP+fBcAFjObGvIsLCt+bMIM1zol9f7uom5fjuY9t04Q8ybcOMwr6bpx4bqx3o6gijy2cE1492phIQldC2iXrgJmYt4mSsNPgKW8jZ8ai0uzlg/zJlr9qVzzgaeBZ4BdmMMFAAeBD+sdVwZUAv0bOO8q4EzMm2s9MG/IPYP5XiM1cuXHLT8ecrlOL9LlVpciZMqYpZRTjQZ+BuwFggykH2fzfQz5ZSgiJEgAF/fqt/ULVpciTPLDbSHt0mswr9f6AbCbfbj4GPk9KCIhhGYbM9nAi1aXIo6Q0LXeB8B6oA8A69nAbpnRICJgB++xkd+H7yOIOCGha7Hw/N0XMCc7mQsnVrJc+u+KNsllKWu5Q7u01+pSxNEkdOOAdulqzIWvtdTdn/+CBRSwxsq6RILK42uWcV34+0rEGQndOKFdugR4DHPNVCcAFvGptIIULVLAdlYyOfz9JOKQhG4cCe828RjmVLIsNJr5vE8Z2ywuTSSCg7hYzGS9Qe+3uhTROAndOKNdei/mUEM2kE6QEJ/zjqxaE03az1aWcrPeqndYXYpomszTjVPKqYYBv8BctGqOzV3E5fTiHCvrEnEol00s41bt0putLkWcmIRuHFNONQSz1UsVYHaFGs8E+nGhlXWJOLKb9azgNu3SW60uRTSPhG6cU051CvBrzMWrZv/dszmHU7j8hL1lRfulCbGV5WzgTu3S260uRzSfhG4CUE7VF/gNZv+sIgCGcQZncA0GDitrExYI4GUN89jNfdqld1tdjmgZCd0EoZyqB2bwZgAFAPShJ2O5mSSyraxNxJCbCr7kA4qYGr7pKhKMhG4CUU7VGbgP6AvsBzSZpHEhN5DFAEuLE9FXRh5LeZUaHtMuLVs9JSgJ3QSjnCoF+B4wHjN4/RgYXMAkesvMhnZrP9tYxkyCvKRd2mN1OaL1JHQTkHIqA7gUuAVzw3FzStkIzsTJldhknLfdCOBlIytw8TjwiTQhT3wSuglMOdUZwE8w90Awb7B1pzNjmUJGuGuZSFxVFPAlSyjjce3Sq60uR0SGhG6CC99g+xnmPgj7gRAGirGMpx8TpCF6AtKE2M1GVjEXzT+1Sx+wuiQRORK67UB4nPc64DLM4YYqAPrSi9FcR2qTO4uJeOKhjJWs4CDvAm9pl3ZbXZKILAnddiQ83PBDIA1zFzGNAzvjuJhenIOS5RRxK0SAPWxgLesIMAtYK83H2ycJ3XZGOVUmcBtwHuZ83lrAvOo9iyvJ4CQLyxMNKWcPK9hAKcuAl6UtY/smodsOKadSwBjgDsxVbPkQ3nltJCMZxMXYSbOuQgGAjyo2s5ptuIA3gOXhnUREOyah244pp+oE3ACMw9ykvByAVJI5mwvpxTlyo80CIQIcYCOrceFlAfCedukKq8sSsSGh286Fr3qdwPeBnkAhYE6u705nRjCBLgyV8d4YCBGkkI2sYQdV7MEcSpBmNR2MhG4HoZzKAZwP3AjYMYcczLeyPejCmVxAF4ZJ+EaBRlPE16zFRRkVwHvAQu3SfqtLE7EnodvBKKfKAiYDF2MuqiigfvgO53y6MlzCNwI0IUrYyjq2UEwlMBf4XIYSOjYJ3Q5KOVVXzHm9EzFvshUAAcAcdhjKuXRjODaSrKsyQQVwk886vmYvFXiBhcAcmZUgQEK3w1NO1QWYBFwSfiifuvBNIYmhDKcPo0mjh0UlJo5aCtnLGjZxKPwv+BUwW7t0obWFiXgioSuAw+F7KWb4GtTfmw3gZPpyKqPpzBkY2KypMg4F8VGGix1sYQ+1mL+wFgBLtEsXWFydiEMSuuIo4cUVo4ArgG6AFziEOf4L6aTi5DR6MYRMBnbIKWchglSwgwNsZhvF+EkBSoCPgTXapWssrlDEMQld0aBw+8hTMW+4jQIUx179ppHCYJz0YgjZnNKur4BDBKliL3lsYhsHcJOG+W/yDfAZsFUWNojmkNAVJxReZDEGmAD0wrzxVkZdYx0wx39P5VS6MYAcBpCS4E12NOCmkDJ2U8Bu9lCEj2zMoD0ALAY2yM0x0VISuqLZwgstegDDgAvgcB+HCqCSuqXGAFmk058BdGMA2QyI+05nmhAeiqkijyJ2s4dcKrEDmeEjioFFwHqgUJrRiNaS0BWtEg7gbpgBfC5wcvhTGjOAq6gfwumk0IPudKYHWXQnne6k0h07KTEu3VyGW0sh1eRTTj7FFJDPofDYbA4cnqPsAtYC24EDErQiEiR0RUQop0oF+mGOA5+FGcIaM8BqgRrM5cdHf8N1IosudCGdTFLJJIVMkskgiUySyMROGgp7s8aLQ4QI4SGIlwAefFTgoYJaKqihgioqKKeCCqqBVMydldMwbxIqIA9YA2wDcmUvMhENEroiKuqFcH9gEDAA6IoZcEb4z1rAhzlD4sRLYu3YsGPDgR07NgwM/AQOfwQ49kZWEpAMpIT/tHEkYIuBnZhXsXlAvnbpKoSIMgldETPKqZKB7uGPkzCDuHP4IwPzKrjuG1Jx5G1+3Z+hescozBBVxzxed7yBOcRRhDnlLQ8zaAuAAtmRQVhFQlfEBeVUdszgzQz/mY4Zqvbwnw7MK9ek8H/XXSH76v3dC7gxx5QrpKGMiEcSukIIEUMdbzWREEJYSEJXCCFiyG51AUJEklIqF/MGWhAIaK1HN3CME3gec05uMvCF1voepdQIoLfW+tOYFSw6HAld0R5dpLUubuLzTwFPaK0/BFBKDQs/PgIYDTQ7dJVSdq11oLWFio5HQld0RL0w+ycAoLXepJRKAh4EUpVS44EZwB7gH5jzfN3AHVprl1LqduA6zFkWNqXUzcDbQBbmz9S9WusvYvj1iAQioSvaGw3MVUpp4Hmt9b8aOOYJYKFSahnmFjova63LlVIPAKO11j8BUEplAedrrQNKqUuAh4Fvh88xEhiutS5VSv0K+Fxr/RellA1ke3vROAld0d6M11ofVEp1B+YppbZprZfWP0Br/bJS6nPgcuAa4IdKqTMbOFc28KpS6lTMMHfU+9w8rXVp+O+rgZeUUg7gA631hgh/TaIdkdkLol3RWh8M/3kI+B9wdiPH5WmtX9JaX4O528PQBg57CFiktR4KXAVHNec53Kg8HOoXAAeBV5RS34vE1yLaJwld0W4opdKVUpl1f8fc+21zA8ddHr4qRSnVE+iCGZhVHGnlCOaV7sHw329v4nX7A4Va61nAC5hDD0I0SEJXtCc9gC+VUhuBVcAnWuvPGjhuErA5fNznwG+01gWY/XKHKKU2KKVuAv4KzFBKrafpobgJwMbwcTdh3nwTokGyDFgIIWJIrnSFECKGJHSFECKGJHSFECKGJHSFECKGJHSFECKGJHSFECKGJHSFECKG/j+hsfRXnZWnDAAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "\n",
    "labels = '5 Stars', '1 Star', '4 Stars', '3 Stars', '2 Stars'\n",
    "sizes = [reviewData.rating.value_counts()[0], reviewData.rating.value_counts()[1],reviewData.rating.value_counts()[2],reviewData.rating.value_counts()[3],reviewData.rating.value_counts()[4]]\n",
    "colors = ['green', 'yellowgreen', 'coral', 'lightblue', 'grey']\n",
    "explode = (0, 0, 0, 0, 0)  # explode 1st slice\n",
    " \n",
    "# Plot\n",
    "plt.pie(sizes, explode=explode, labels=labels, colors=colors,\n",
    "        autopct='%1.1f%%', shadow=True, startangle=140)\n",
    " \n",
    "plt.axis('equal')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    ['\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n  \\n  \\n    ', '\\n  So I...\n",
       "1    ['\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n  \\n  \\n    ', '\\n  I ca...\n",
       "2    ['\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n  \\n  \\n    ', '\\n  I’ve...\n",
       "3    ['\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n  \\n  \\n    ', \"\\n  I ha...\n",
       "4    ['\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n  \\n  \\n    ', '\\n  Brou...\n",
       "Name: body, dtype: object"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "reviewData.body.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "\"['Reviewed in the United Kingdom on 9 March 2021']\""
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# the latest review\n",
    "max(reviewData.date)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "\"['Reviewed in the United Kingdom on 1 February 2020']\""
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# the oldest review\n",
    "min(reviewData.date)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## The Text Analytics Starts"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "##### Calculate the number of words in each review"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [],
   "source": [
    "reviewsBodyWordCount = reviewData.body.apply(lambda x: len(str(x).split(\" \")))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "9    585\n",
       "9    566\n",
       "6    542\n",
       "7    509\n",
       "3    381\n",
       "    ... \n",
       "7     16\n",
       "5     16\n",
       "0     16\n",
       "1     15\n",
       "2     15\n",
       "Name: body, Length: 167, dtype: int64"
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "reviewsBodyWordCount.sort_values(ascending= False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "9    585\n",
       "9    566\n",
       "6    542\n",
       "7    509\n",
       "3    381\n",
       "Name: body, dtype: int64"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "reviewsBodyWordCount.nlargest(n=5, keep='first')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1    15\n",
       "2    15\n",
       "1    16\n",
       "7    16\n",
       "9    16\n",
       "Name: body, dtype: int64"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "reviewsBodyWordCount.nsmallest(n=5, keep='first')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "##### Calculate the number of characters in each review"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [],
   "source": [
    "reviewsBodyCharCount = reviewData.body.str.len() ## this also includes spaces"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "9    3145\n",
       "6    2989\n",
       "9    2957\n",
       "7    2762\n",
       "3    2095\n",
       "Name: body, dtype: int64"
      ]
     },
     "execution_count": 35,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "reviewsBodyCharCount.nlargest(n=5, keep='first')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "5    62\n",
       "1    64\n",
       "7    65\n",
       "2    65\n",
       "2    67\n",
       "Name: body, dtype: int64"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "reviewsBodyCharCount.nsmallest(n=5, keep='first')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Pre-processing starts"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Convert all the words to lower case"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "pandas.core.strings.StringMethods"
      ]
     },
     "execution_count": 37,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "type(reviewData.body.str)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>author</th>\n",
       "      <th>body</th>\n",
       "      <th>date</th>\n",
       "      <th>helpful</th>\n",
       "      <th>rating</th>\n",
       "      <th>title</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>[]</td>\n",
       "      <td>['\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n  \\n  \\n    ', '\\n  So I...</td>\n",
       "      <td>['Reviewed in the United Kingdom on 3 Septembe...</td>\n",
       "      <td>['2 people found this helpful']</td>\n",
       "      <td>['5.0 out of 5 stars']</td>\n",
       "      <td>['\\n\\n\\n\\n\\n\\n\\n\\n  \\n  \\n    ', 'Great for th...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>[]</td>\n",
       "      <td>['\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n  \\n  \\n    ', '\\n  I ca...</td>\n",
       "      <td>['Reviewed in the United Kingdom on 7 December...</td>\n",
       "      <td>[]</td>\n",
       "      <td>['4.0 out of 5 stars']</td>\n",
       "      <td>['\\n\\n\\n\\n\\n\\n\\n\\n  \\n  \\n    ', 'Great system...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>[]</td>\n",
       "      <td>['\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n  \\n  \\n    ', '\\n  I’ve...</td>\n",
       "      <td>['Reviewed in the United Kingdom on 10 January...</td>\n",
       "      <td>['One person found this helpful']</td>\n",
       "      <td>['5.0 out of 5 stars']</td>\n",
       "      <td>['\\n\\n\\n\\n\\n\\n\\n\\n  \\n  \\n    ', 'Easy to fit ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>[]</td>\n",
       "      <td>['\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n  \\n  \\n    ', \"\\n  I ha...</td>\n",
       "      <td>['Reviewed in the United Kingdom on 2 December...</td>\n",
       "      <td>[]</td>\n",
       "      <td>['5.0 out of 5 stars']</td>\n",
       "      <td>['\\n\\n\\n\\n\\n\\n\\n\\n  \\n  \\n    ', 'Installation...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>[]</td>\n",
       "      <td>['\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n  \\n  \\n    ', '\\n  Brou...</td>\n",
       "      <td>['Reviewed in the United Kingdom on 13 August ...</td>\n",
       "      <td>['6 people found this helpful']</td>\n",
       "      <td>['4.0 out of 5 stars']</td>\n",
       "      <td>['\\n\\n\\n\\n\\n\\n\\n\\n  \\n  \\n    ', 'Looks the pa...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  author                                               body  \\\n",
       "0     []  ['\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n  \\n  \\n    ', '\\n  So I...   \n",
       "1     []  ['\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n  \\n  \\n    ', '\\n  I ca...   \n",
       "2     []  ['\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n  \\n  \\n    ', '\\n  I’ve...   \n",
       "3     []  ['\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n  \\n  \\n    ', \"\\n  I ha...   \n",
       "4     []  ['\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n  \\n  \\n    ', '\\n  Brou...   \n",
       "\n",
       "                                                date  \\\n",
       "0  ['Reviewed in the United Kingdom on 3 Septembe...   \n",
       "1  ['Reviewed in the United Kingdom on 7 December...   \n",
       "2  ['Reviewed in the United Kingdom on 10 January...   \n",
       "3  ['Reviewed in the United Kingdom on 2 December...   \n",
       "4  ['Reviewed in the United Kingdom on 13 August ...   \n",
       "\n",
       "                             helpful                  rating  \\\n",
       "0    ['2 people found this helpful']  ['5.0 out of 5 stars']   \n",
       "1                                 []  ['4.0 out of 5 stars']   \n",
       "2  ['One person found this helpful']  ['5.0 out of 5 stars']   \n",
       "3                                 []  ['5.0 out of 5 stars']   \n",
       "4    ['6 people found this helpful']  ['4.0 out of 5 stars']   \n",
       "\n",
       "                                               title  \n",
       "0  ['\\n\\n\\n\\n\\n\\n\\n\\n  \\n  \\n    ', 'Great for th...  \n",
       "1  ['\\n\\n\\n\\n\\n\\n\\n\\n  \\n  \\n    ', 'Great system...  \n",
       "2  ['\\n\\n\\n\\n\\n\\n\\n\\n  \\n  \\n    ', 'Easy to fit ...  \n",
       "3  ['\\n\\n\\n\\n\\n\\n\\n\\n  \\n  \\n    ', 'Installation...  \n",
       "4  ['\\n\\n\\n\\n\\n\\n\\n\\n  \\n  \\n    ', 'Looks the pa...  "
      ]
     },
     "execution_count": 38,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "reviewData.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [],
   "source": [
    "reviewData.body = reviewData.body.str.lower()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>author</th>\n",
       "      <th>body</th>\n",
       "      <th>date</th>\n",
       "      <th>helpful</th>\n",
       "      <th>rating</th>\n",
       "      <th>title</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>[]</td>\n",
       "      <td>['\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n  \\n  \\n    ', '\\n  so i...</td>\n",
       "      <td>['Reviewed in the United Kingdom on 3 Septembe...</td>\n",
       "      <td>['2 people found this helpful']</td>\n",
       "      <td>['5.0 out of 5 stars']</td>\n",
       "      <td>['\\n\\n\\n\\n\\n\\n\\n\\n  \\n  \\n    ', 'Great for th...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>[]</td>\n",
       "      <td>['\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n  \\n  \\n    ', '\\n  i ca...</td>\n",
       "      <td>['Reviewed in the United Kingdom on 7 December...</td>\n",
       "      <td>[]</td>\n",
       "      <td>['4.0 out of 5 stars']</td>\n",
       "      <td>['\\n\\n\\n\\n\\n\\n\\n\\n  \\n  \\n    ', 'Great system...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>[]</td>\n",
       "      <td>['\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n  \\n  \\n    ', '\\n  i’ve...</td>\n",
       "      <td>['Reviewed in the United Kingdom on 10 January...</td>\n",
       "      <td>['One person found this helpful']</td>\n",
       "      <td>['5.0 out of 5 stars']</td>\n",
       "      <td>['\\n\\n\\n\\n\\n\\n\\n\\n  \\n  \\n    ', 'Easy to fit ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>[]</td>\n",
       "      <td>['\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n  \\n  \\n    ', \"\\n  i ha...</td>\n",
       "      <td>['Reviewed in the United Kingdom on 2 December...</td>\n",
       "      <td>[]</td>\n",
       "      <td>['5.0 out of 5 stars']</td>\n",
       "      <td>['\\n\\n\\n\\n\\n\\n\\n\\n  \\n  \\n    ', 'Installation...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>[]</td>\n",
       "      <td>['\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n  \\n  \\n    ', '\\n  brou...</td>\n",
       "      <td>['Reviewed in the United Kingdom on 13 August ...</td>\n",
       "      <td>['6 people found this helpful']</td>\n",
       "      <td>['4.0 out of 5 stars']</td>\n",
       "      <td>['\\n\\n\\n\\n\\n\\n\\n\\n  \\n  \\n    ', 'Looks the pa...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  author                                               body  \\\n",
       "0     []  ['\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n  \\n  \\n    ', '\\n  so i...   \n",
       "1     []  ['\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n  \\n  \\n    ', '\\n  i ca...   \n",
       "2     []  ['\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n  \\n  \\n    ', '\\n  i’ve...   \n",
       "3     []  ['\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n  \\n  \\n    ', \"\\n  i ha...   \n",
       "4     []  ['\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n  \\n  \\n    ', '\\n  brou...   \n",
       "\n",
       "                                                date  \\\n",
       "0  ['Reviewed in the United Kingdom on 3 Septembe...   \n",
       "1  ['Reviewed in the United Kingdom on 7 December...   \n",
       "2  ['Reviewed in the United Kingdom on 10 January...   \n",
       "3  ['Reviewed in the United Kingdom on 2 December...   \n",
       "4  ['Reviewed in the United Kingdom on 13 August ...   \n",
       "\n",
       "                             helpful                  rating  \\\n",
       "0    ['2 people found this helpful']  ['5.0 out of 5 stars']   \n",
       "1                                 []  ['4.0 out of 5 stars']   \n",
       "2  ['One person found this helpful']  ['5.0 out of 5 stars']   \n",
       "3                                 []  ['5.0 out of 5 stars']   \n",
       "4    ['6 people found this helpful']  ['4.0 out of 5 stars']   \n",
       "\n",
       "                                               title  \n",
       "0  ['\\n\\n\\n\\n\\n\\n\\n\\n  \\n  \\n    ', 'Great for th...  \n",
       "1  ['\\n\\n\\n\\n\\n\\n\\n\\n  \\n  \\n    ', 'Great system...  \n",
       "2  ['\\n\\n\\n\\n\\n\\n\\n\\n  \\n  \\n    ', 'Easy to fit ...  \n",
       "3  ['\\n\\n\\n\\n\\n\\n\\n\\n  \\n  \\n    ', 'Installation...  \n",
       "4  ['\\n\\n\\n\\n\\n\\n\\n\\n  \\n  \\n    ', 'Looks the pa...  "
      ]
     },
     "execution_count": 40,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# all the body reviews are converted to lowercase\n",
    "reviewData.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Remove the punctuations"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [],
   "source": [
    "reviewData.body = reviewData.body.str.replace('[^\\w\\s]','')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Remove the Stop Words, which are the commonly occuring words like the, a, an, is etc."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [],
   "source": [
    "stop = stopwords.words('english')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [],
   "source": [
    "reviewData.body = reviewData.body.apply(lambda x: \" \".join(x for x in x.split() if x not in stop))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    nnnnnnnnnn n n n already phillips hue bulbs ar...\n",
       "1    nnnnnnnnnn n n n cant comment installation pai...\n",
       "2    nnnnnnnnnn n n n ive never fitted one healthy ...\n",
       "3    nnnnnnnnnn n n n combi boiler internetenabled ...\n",
       "4    nnnnnnnnnn n n n brought prime day sale got 95...\n",
       "Name: body, dtype: object"
      ]
     },
     "execution_count": 44,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "reviewData.body.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "##### These are the most comonly occuring words in our corpus. Depending on the requirement, we may remove or keep them. We are removing them here"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [],
   "source": [
    "freq = pd.Series(' '.join(reviewData.body).split()).value_counts()[:20]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "n             843\n",
       "nnnnnnnnnn    167\n",
       "hive          102\n",
       "heating        89\n",
       "thermostat     80\n",
       "easy           78\n",
       "boiler         56\n",
       "install        53\n",
       "get            50\n",
       "use            48\n",
       "set            45\n",
       "app            45\n",
       "home           37\n",
       "works          36\n",
       "would          36\n",
       "control        35\n",
       "one            34\n",
       "great          33\n",
       "really         33\n",
       "hub            32\n",
       "dtype: int64"
      ]
     },
     "execution_count": 46,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "freq"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {},
   "outputs": [],
   "source": [
    "freq = list(freq.index)\n",
    "reviewData.body = reviewData.body.apply(lambda x: \" \".join(x for x in x.split() if x not in freq))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    already phillips hue bulbs around house big fa...\n",
       "1    cant comment installation paid local trader pr...\n",
       "2    ive never fitted healthy interest everything d...\n",
       "3    combi internetenabled 1 doesnt features upgrad...\n",
       "4    brought prime day sale got 95 fitted quite str...\n",
       "Name: body, dtype: object"
      ]
     },
     "execution_count": 48,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "reviewData.body.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "##### Lets see which are the rare words, we are removing them"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {},
   "outputs": [],
   "source": [
    "freq = pd.Series(' '.join(reviewData.body).split()).value_counts()[-10:]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "attention       1\n",
       "anybody         1\n",
       "informationn    1\n",
       "activates       1\n",
       "aspect          1\n",
       "easytouse       1\n",
       "chat            1\n",
       "possible        1\n",
       "confusing       1\n",
       "270             1\n",
       "dtype: int64"
      ]
     },
     "execution_count": 50,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "freq"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "metadata": {},
   "outputs": [],
   "source": [
    "freq = list(freq.index)\n",
    "reviewData.body = reviewData.body.apply(lambda x: \" \".join(x for x in x.split() if x not in freq))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Tokentization of reviews"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "metadata": {},
   "outputs": [],
   "source": [
    "from nltk.tokenize import word_tokenize"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['cant', 'comment', 'installation', 'paid', 'local', 'trader', 'problems', 'using', 'system', 'think', 'brilliant', 'ive', 'major', 'issue', 'small', 'thing', 'schedule', 'next', 'morning', 'certain', 'time', 'didnt', 'come', 'planned', 'flawlessly', 'alexa', 'im', 'little', 'disappointed', 'screen', 'goes', 'seconds', 'save', 'battery', 'power', 'touch', 'see', 'temperature', 'check', 'overall', 'though', 'im', 'pleased', 'system', 'using', 'two', 'weeks', 'time', 'tell']\n"
     ]
    }
   ],
   "source": [
    "tokens = word_tokenize(reviewData.iloc[1,1])\n",
    "print(tokens)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'cant comment installation paid local trader problems using system think brilliant ive major issue small thing schedule next morning certain time didnt come planned flawlessly alexa im little disappointed screen goes seconds save battery power touch see temperature check overall though im pleased system using two weeks time tell'"
      ]
     },
     "execution_count": 54,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "reviewData.iloc[1,1]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Doing Stemming, currenlty commented since we are preferring Lemit"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 239,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    replac old 80 slide timer clock graviti fed st...\n",
       "1    purchas model opt self bit research onlin manu...\n",
       "2    purchas although reason savvi electr peer time...\n",
       "3    instal black fridaycyb monday offer kit ship e...\n",
       "4    love bought new combi realli best thing ive ev...\n",
       "Name: body, dtype: object"
      ]
     },
     "execution_count": 239,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#from nltk.stem import PorterStemmer\n",
    "#st = PorterStemmer()\n",
    "#reviewData.body[:5].apply(lambda x: \" \".join([st.stem(word) for word in x.split()]))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Doing Lemmatization"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    already phillips hue bulb around house big fan...\n",
       "1    cant comment installation paid local trader pr...\n",
       "2    ive never fitted healthy interest everything d...\n",
       "3    combi internetenabled 1 doesnt feature upgradi...\n",
       "4    brought prime day sale got 95 fitted quite str...\n",
       "Name: body, dtype: object"
      ]
     },
     "execution_count": 55,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from textblob import Word\n",
    "reviewData.body = reviewData.body.apply(lambda x: \" \".join([Word(word).lemmatize() for word in x.split()]))\n",
    "reviewData.body.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "metadata": {},
   "outputs": [],
   "source": [
    "sentimentString = reviewData.iloc[1,1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "metadata": {},
   "outputs": [],
   "source": [
    "# append to this string \n",
    "for i in range(2,len(reviewData)):\n",
    "    sentimentString = sentimentString + reviewData.iloc[i,1]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### This is the Sentiment Analysis "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "metadata": {},
   "outputs": [],
   "source": [
    "# the functions generates polarity and subjectivity here, subsetting the polarity only here\n",
    "allReviewsSentiment = reviewData.body[:900].apply(lambda x: TextBlob(x).sentiment[0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 62,
   "metadata": {},
   "outputs": [],
   "source": [
    "# this contains boths subjectivity and polarity\n",
    "allReviewsSentimentComplete = reviewData.body[:900].apply(lambda x: TextBlob(x).sentiment)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 63,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    (0.06315192743764173, 0.5116213151927438)\n",
       "1    (0.12410714285714286, 0.4767857142857143)\n",
       "2    (0.23928571428571432, 0.6821428571428572)\n",
       "3    (0.07947781385281384, 0.3274756493506493)\n",
       "4                  (0.04999999999999997, 0.45)\n",
       "Name: body, dtype: object"
      ]
     },
     "execution_count": 63,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "allReviewsSentimentComplete.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 64,
   "metadata": {},
   "outputs": [],
   "source": [
    "allReviewsSentiment.to_csv(out_folder + 'ReviewsSentiment.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "## get the respective counts for all the reviews\n",
    "# if the polarity is above 0.75 Very Satisfied\n",
    "# if the polarity is between 0.5-0.74 Satisfied\n",
    "# if the polarity is between 0.2-0.5 Nice\n",
    "# if the polarity is between -0.2 to 0.2 Neutral\n",
    "# if the polarity is between -0.2 to -0.5 Dissatisfied\n",
    "# if the polarity is between -0.5 to -0.74 Very Dissatisfied\n",
    "# if the polarity is less than -0.75 Extremely Dissatisfied\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 65,
   "metadata": {},
   "outputs": [],
   "source": [
    "allReviewsSentimentDF = allReviewsSentiment.to_frame()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>body</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.063152</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0.124107</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0.239286</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0.079478</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0.050000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       body\n",
       "0  0.063152\n",
       "1  0.124107\n",
       "2  0.239286\n",
       "3  0.079478\n",
       "4  0.050000"
      ]
     },
     "execution_count": 66,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "allReviewsSentimentDF.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 67,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Create a list to store the data\n",
    "grades = []\n",
    "\n",
    "# For each row in the column,\n",
    "for row in allReviewsSentimentDF['body']:\n",
    "    # if more than a value,\n",
    "    if row >= 0.75:\n",
    "       grades.append('Extremely Satisfied')\n",
    "    elif (row >= 0.5) & (row < 0.75):\n",
    "        grades.append('Satisfied')\n",
    "    elif (row >= 0.2) & (row < 0.5):\n",
    "        grades.append('Nice')\n",
    "    elif (row >= -0.2) & (row < 0.2):\n",
    "        grades.append('Neutral')\n",
    "    elif (row > -0.5) & (row <= -0.2):\n",
    "        grades.append('Bad')\n",
    "    elif (row >= -0.75) & (row < -0.5):\n",
    "        grades.append('Dis-satisfied')\n",
    "    elif  row < -0.75:\n",
    "        grades.append('Extremely Dis-satisfied')\n",
    "    else:\n",
    "        # Append a failing grade\n",
    "        grades.append('No Sentiment')\n",
    "        \n",
    "# Create a column from the list\n",
    "allReviewsSentimentDF['SentimentScore'] = grades"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 68,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>body</th>\n",
       "      <th>SentimentScore</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.063152</td>\n",
       "      <td>Neutral</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0.124107</td>\n",
       "      <td>Neutral</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0.239286</td>\n",
       "      <td>Nice</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0.079478</td>\n",
       "      <td>Neutral</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0.050000</td>\n",
       "      <td>Neutral</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       body SentimentScore\n",
       "0  0.063152        Neutral\n",
       "1  0.124107        Neutral\n",
       "2  0.239286           Nice\n",
       "3  0.079478        Neutral\n",
       "4  0.050000        Neutral"
      ]
     },
     "execution_count": 68,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "allReviewsSentimentDF.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 69,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Neutral                93\n",
       "Nice                   43\n",
       "Satisfied              19\n",
       "Extremely Satisfied     9\n",
       "Bad                     2\n",
       "Dis-satisfied           1\n",
       "Name: SentimentScore, dtype: int64"
      ]
     },
     "execution_count": 69,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "allReviewsSentimentDF.SentimentScore.value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 70,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "pandas.core.series.Series"
      ]
     },
     "execution_count": 70,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "type(allReviewsSentimentDF.SentimentScore)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 71,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<AxesSubplot:>"
      ]
     },
     "execution_count": 71,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXAAAAFPCAYAAABUNZVHAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy86wFpkAAAACXBIWXMAAAsTAAALEwEAmpwYAAAZU0lEQVR4nO3de7RkZX3m8e9DNw5XBaXDIKiNCjheELFRFGMyokZkBEyIN1RUlKjRiDpRdGJ01EwwY1SWOkaQcRAZE7wNKI7KIEq8gDSIchNhgQiK0l4IRBK5+Js/9j72oXPaPt1dVbvfzfez1llVe1dV16/W6fOc97z7vaSqkCS1Z7OhC5AkbRgDXJIaZYBLUqMMcElqlAEuSY1aOss322GHHWr58uWzfEtJat7555//06patub5mQb48uXLWbly5SzfUpKal+Sahc7bhSJJjTLAJalRBrgkNcoAl6RGGeCS1CgDXJIaZYBLUqMMcElqlAEuSY2a6UzMDbX86NNn+n7fP+bAmb6fJG0IW+CS1CgDXJIaZYBLUqMMcElqlAEuSY0ywCWpUQa4JDXKAJekRhngktQoA1ySGmWAS1KjDHBJapQBLkmNMsAlqVEGuCQ1ygCXpEYZ4JLUKANckhplgEtSowxwSWqUAS5JjTLAJalRBrgkNcoAl6RGGeCS1KhFBXiSVye5JMnFST6WZIskuyY5N8mVSf4hyd2mXawkabV1BniSnYE/A1ZU1UOBJcCzgHcA766qBwK/AI6YZqGSpDtbbBfKUmDLJEuBrYDrgScAn+gfPxE4ZOLVSZLWap0BXlU/BN4J/IAuuP8JOB+4sapu7592HbDzQq9PcmSSlUlWrlq1ajJVS5IW1YWyPXAwsCtwb2Br4CmLfYOqOq6qVlTVimXLlm1woZKkO1tMF8oTgauralVV3QZ8CtgP2K7vUgHYBfjhlGqUJC1gMQH+A2DfJFslCbA/cClwFnBo/5zDgVOnU6IkaSGL6QM/l+5i5QXARf1rjgNeD7wmyZXAvYATplinJGkNS9f9FKiqNwNvXuP0VcCjJl6RJGlRnIkpSY0ywCWpUQa4JDXKAJekRhngktQoA1ySGmWAS1KjDHBJapQBLkmNMsAlqVEGuCQ1ygCXpEYZ4JLUKANckhplgEtSowxwSWqUAS5JjTLAJalRBrgkNcoAl6RGGeCS1CgDXJIaZYBLUqMMcElqlAEuSY0ywCWpUQa4JDXKAJekRhngktQoA1ySGmWAS1KjDHBJapQBLkmNMsAlqVEGuCQ1alEBnmS7JJ9I8t0klyV5TJJ7JjkjyRX97fbTLlaStNpiW+DHAp+vqgcBDwcuA44Gzqyq3YAz+2NJ0oysM8CT3AN4PHACQFXdWlU3AgcDJ/ZPOxE4ZDolSpIWspgW+K7AKuDDSb6V5ENJtgZ2rKrr++f8GNhxoRcnOTLJyiQrV61aNZmqJUmLCvClwN7AB6rqEcAvWaO7pKoKqIVeXFXHVdWKqlqxbNmyja1XktRbTIBfB1xXVef2x5+gC/SfJNkJoL+9YTolSpIWss4Ar6ofA9cm2aM/tT9wKXAacHh/7nDg1KlUKEla0NJFPu+VwMlJ7gZcBbyQLvxPSXIEcA3wjOmUKElayKICvKouBFYs8ND+E61GkrRozsSUpEYZ4JLUKANckhplgEtSowxwSWqUAS5JjTLAJalRBrgkNcoAl6RGGeCS1CgDXJIaZYBLUqMMcElqlAEuSY0ywCWpUQa4JDXKAJekRhngktQoA1ySGmWAS1KjDHBJapQBLkmNMsAlqVEGuCQ1ygCXpEYZ4JLUKANckhplgEtSowxwSWqUAS5JjTLAJalRBrgkNWrp0AUIlh99+sze6/vHHDiz95I0XbbAJalRBrgkNcoAl6RGLTrAkyxJ8q0kn+2Pd01ybpIrk/xDkrtNr0xJ0prWpwX+KuCyecfvAN5dVQ8EfgEcMcnCJEm/3aICPMkuwIHAh/rjAE8APtE/5UTgkCnUJ0lai8W2wN8DvA74dX98L+DGqrq9P74O2HmhFyY5MsnKJCtXrVq1MbVKkuZZZ4An+U/ADVV1/oa8QVUdV1UrqmrFsmXLNuSfkCQtYDETefYDDkryVGAL4O7AscB2SZb2rfBdgB9Or0xJ0prW2QKvqjdU1S5VtRx4FvClqjoMOAs4tH/a4cCpU6tSkvRvbMw48NcDr0lyJV2f+AmTKUmStBjrtRZKVX0Z+HJ//yrgUZMvSZK0GM7ElKRGGeCS1CgDXJIaZYBLUqMMcElqlAEuSY0ywCWpUQa4JDXKAJekRhngktQoA1ySGmWAS1KjDHBJapQBLkmNMsAlqVEGuCQ1ygCXpEYZ4JLUKANckhplgEtSowxwSWqUAS5JjTLAJalRBrgkNcoAl6RGGeCS1CgDXJIaZYBLUqMMcElqlAEuSY0ywCWpUQa4JDXKAJekRhngktQoA1ySGrXOAE9ynyRnJbk0ySVJXtWfv2eSM5Jc0d9uP/1yJUlzFtMCvx14bVU9GNgX+NMkDwaOBs6sqt2AM/tjSdKMrDPAq+r6qrqgv38zcBmwM3AwcGL/tBOBQ6ZUoyRpAevVB55kOfAI4Fxgx6q6vn/ox8COa3nNkUlWJlm5atWqjalVkjTPogM8yTbAJ4Gjquqm+Y9VVQG10Ouq6riqWlFVK5YtW7ZRxUqSVlu6mCcl2ZwuvE+uqk/1p3+SZKequj7JTsAN0ypS7Vp+9Okzfb/vH3PgTN9PGtJiRqEEOAG4rKreNe+h04DD+/uHA6dOvjxJ0tospgW+H/A84KIkF/bn3ggcA5yS5AjgGuAZU6lQkrSgdQZ4VX0VyFoe3n+y5UiSFsuZmJLUKANckhplgEtSowxwSWqUAS5JjTLAJalRBrgkNcoAl6RGGeCS1CgDXJIaZYBLUqMMcElqlAEuSY0ywCWpUQa4JDXKAJekRhngktQoA1ySGmWAS1KjFrOpsaS1WH706TN9v+8fc+BM30+bNlvgktQoA1ySGmWAS1KjDHBJapQBLkmNMsAlqVEGuCQ1ygCXpEYZ4JLUKANckhplgEtSowxwSWqUAS5JjTLAJalRLicraa1cLnfTZgtckhq1UQGe5ClJLk9yZZKjJ1WUJGndNrgLJckS4P3Ak4DrgPOSnFZVl06qOEmapta7iDamBf4o4MqquqqqbgX+Hjh4MmVJktYlVbVhL0wOBZ5SVS/uj58HPLqqXrHG844EjuwP9wAu3/By19sOwE9n+H6zNubPN+bPBn6+1s36892vqpateXLqo1Cq6jjguGm/z0KSrKyqFUO89yyM+fON+bOBn691m8rn25gulB8C95l3vEt/TpI0AxsT4OcBuyXZNcndgGcBp02mLEnSumxwF0pV3Z7kFcAXgCXA/6yqSyZW2WQM0nUzQ2P+fGP+bODna90m8fk2+CKmJGlYzsSUpEYZ4JLUKANckhrlaoTSFCW55297vKp+PqtaND6juYh5V/pBSfI4YLeq+nCSZcA2VXX10HVtqCSv+W2PV9W7ZlXLpCW5GiggwH2BX/T3twN+UFW7DlfdZIz5+7epG1ML/HxW/6CsqYD7z7ac6UjyZmAF3bIEHwY2Bz4K7DdkXRtp2/52D2AfVs8neBrwzUEqmpC5gE5yPPDpqvpcf3wAcMiApU3SaL9/AElupsuQBVXV3WdYzp2MpgV+V5HkQuARwAVV9Yj+3Heqas9BC5uAJGcDB1bVzf3xtsDpVfX4YSvbeEkuqqqHretcy8b8/QNI8jbgeuAkuobiYcBOVfWXQ9U0phb4byTZHtgN2GLuXFWdPVxFE3VrVVWSAkiy9dAFTdCOwK3zjm/tz43Bj5L8Bd1fS9D98P9owHqmYczfP4CDqurh844/kOTbgAE+KUleDLyKbm2WC4F9gW8ATxiwrEk6JckHge2SvAR4EXD8wDVNykeAbyb5dH98CHDicOVM1LOBNwOfpvtz/Oz+3JiM+fsH8Mskh9EtnV10379fDlnQ6LpQklxE1w93TlXtleRBwH+rqj8cuLSJSfIk4Ml0f8Z9oarOGLikiUmyN/C7/eHZVfWtIeuZtCRbV9WgP/TTlOSRwOP6w1F9/5IsB46lu95UwNeAo6rq+4PVNMIAP6+q9un7ih9dVb9KcklVPWTo2iYhya7A9VX1r/3xlsCOQ/4nmqSxjbCZk+SxwIfoPs99kzwc+JOqevnApU1ckt/hzt2XPxiwnFEb40Se65JsB/wf4IwkpwLXDFrRZH0c+PW84zv6c83rR9i8HnhDf2puhM0YvBv4A+BnAFX1bWAUF/fmJDkoyRXA1cBX+tv/O2xVk5Nk9yRnJrm4P96zv64xmNEFeFU9vapurKq3AG8CTmA8w7UAlvZb2AHQ37/bgPVM0tOBg+j7FavqR6weota8qrp2jVN3DFLI9LyN7prT9/rhk08Ezhm2pIk6nq5xcRtAVX2HbhntwYwqwJMsSfLdueOq+kpVnTY/8EZgVZKD5g6SHMx4tq66tbo+vTGOsLm270apJJsn+c/AZUMXNWG3VdXPgM2SbFZVZ9HNWRiLrapqzXHttw9SSW9Uo1Cq6o4klye574j73V4KnJzkfXQXMa8Fnj9sSRMz5hE2L6W7ALYz3c5VXwT+dNCKJu/GJNvQjbA5OckNDDxKY8J+muQBrG5gHEo3LnwwY7yIeTbdRJdvMu8/T1UdtNYXNaj/QaGq/nnoWiZpzCNsxq7/i+lf6P6yPwy4B3By3ypvXpL7023k8Fi6JRGuBp7rKJQJSvJ7C52vqq/MupZJSvLcqvro2tadcL2JTVOS11XV3yR5LwtMx66qPxugrKlLsgPwsxpbwPCbX1Sbzc04HdKoulB6T62q188/keQddFfFWzbXHzyai3pzkny1qh63wJoTAWrItSYm4NL+duWgVUxRkn2BY4Cf013IPAnYga4v/PlV9fkh69tYa2s8Jd2yS0M2nsYY4E+iG4o23wELnGtKVX2wv/2vQ9cyBc8HqKrR/XICngl8Ftiuqo4dupgpeR/wRrouky8BB1TVOf0kuo8BTQc4sFV/u8n9/xxNgCd5GfBy4AFJvjPvoW2Brw9T1eQk+W3rLVRVvW1mxUzex4FHJjmzqvYfupgJe2SSewMvSvIR1lgtcyTLHC+tqi8CJHlrVZ0DUFXfnWulNu4B/e2lVbVJzbkYTYAD/5tu0sBfA0fPO3/zSH5IFrqavzVwBHAvuj9dW7VZkjcCuy/Ux994//7fAWfSLWd8PncO8LEsczx/Ytm/rPHYGPrAn5rkaLox4JtUgI/xIuZ9Fzo/pmGF/TKdr6IL71OAv62qG4atasMl2YNustVRdIF3J2PoNkrygap62dB1TEOSO+gaGAG2BG6ZewjYoqo2H6q2SUjy34GXANuw+rPBJnCNZowBfhGrN3bYAtgVuHwMa6H0uw69hm6I1onAsVX1i2GrmpwkB1TVaKZez9ePH76uX5vn94E9gY9U1Y1D1qXFS3JqVR08dB3zjWomJkBVPayq9uxvdwMeRbecbNP6VsB5wM3Aw6rqLWMK797uSe6ezglJLkjy5KGLmpBPAnckeSDdWOL70HX7qR3PSbIZ/GZdlIOSDPrXxeha4AsZw84nSX4N/Ipu6u7YhtoBkOTbVfXwJH9AN3PxL4CTqmrvgUvbaEkuqKq9k/w58K9V9d4k35rbVUmbviTn0y11vD3dUrLn0S3/cNhQNY3pIibwbzZY3QzYmxHsfFJVo/traQFzF/ieSte9cElGMowBuC3Js4HD6faKhG61RbUjVXVLkiOA/9FP0LpwyILGGArbzvv6d8DpwCbVb6W1Oj/JF+kC/Av9xdpfr+M1rXgh8Bjgr6rq6n5d95MGrknrJ0keQ3cN6vT+3JIB6xlvF0qSrarqlnU/U5uKvn9xL+Cqqroxyb2AnftlO6VB9ct0vBb4WlW9o18b5aghl0MYXYD3vyFP4C6w88lYJHlQP+ljwb7uqrpg1jVNSpJTquoZ80ZH3UlV7TlAWRqJMQb4ucChwGlzF4iSXFxVDx22Mq1NkuOq6sgkZy3wcFVVsxtSJ9mpqq5Pcr+FHq+qMe0WNUpJ3lNVRyX5DAv/Eh5spdPRXcSEbueTNa59jW3nk1GpqiP7uwfM7fU5J8kWC7ykGVU1t170y9eyyFrTa/TcRcxdq3jnoFUsYIwXMe8KO5+M1UJr1jS/jk3vSQucO2DmVWi9VdX5/d29+l2+fvNFd81mMGMM8JfS7XQyt/PJXoxv55NRSfLvkzwS2DLJI5Ls3X/9PqtXgmtSkpf1/d97JPnOvK+rAS/OtuXwBc69YNZFzDe6PnC1J8nhdD8IK7jzutk3A/+rqj41RF2TkOQedBM/xrrI2uj14/efAzwO+Md5D20L/HrIFTRHE+AjX271LiHJH1XVJ4euY5qS/A7dGj3AuBZZG6v+AvSuLPBLGPhOVQ22sfGYAvy1C5z+zXKrVbXNjEvSBkhyIPAQ7hxybx2uoslI8jTgXcC9gRuA+wGXjWGRNQ1nNH3gVfW3c190iwVtSTf77e8Zx5rLo5fk7+h2sHkl3bT6P6YLujF4O7Av8L2q2hXYHzhn2JK0PpLsm+S8JP+c5NYkdyS5aciaRhPg0C23muTtdBeHlgJ7V9XrW14r+y7msVX1fOAX/RrgjwF2H7imSbmt3519sySbVdVZdH3+asf7gGcDV9A1EF8MvH/IgkYT4HeR5VbHbm43l1v6bchuB3YasJ5JujHJNsDZwMlJjmXhXZa0CauqK4ElVXVHVX0YeMqQ9YypD3z0y62OXZI3Ae8FnsDqls2HqupNw1U1GUm2pvsFtRndYkj3AE7uW+VqQJKzgScCHwJ+DFwPvKCqHj5UTaOZiXkXWW51lJLsA1w7N1Kob6leBHwXePeQtU1KVf0SIMn2wE3AxYZ3c55H9wv4FcCr6Tbl+KMhCxpNC1ztSnIB8MSq+nmSx9NdeH4l3SSs/1BVhw5Z38ZI8lng6Kq6OMlOwAV0Y93vDxxfVe8Zsj5tmP4X8X2GXinTVqs2BUvmTWp5JnBcVX2y7zp54IB1TcKuVXVxf/+FwBlV9TS6ESkvGq4sra8kX+63/Lsn3S/i45O8a8iaDHBtCpYkmevO2x/40rzHWu/mu23e/f2BzwFU1c2MZ7OKu4p7VNVNwB/S7Rj1aLo+8cG0/sOhcfgY8JUkP6W70PePAP0GwP80ZGETcG2SVwLX0W3v93mAJFvilmqtWdp3gz0D+C9DFwMGuDYBVfVXSc6kGzL4xVp9YWYzur7wlh0BvJWupfbMqrqxP78v8OGhitIGeSvwBeCrVXVevyPPFUMW5EVMSWqUfeCStJ76kVODM8Alaf1l3U+ZPgNcmoEk9xq6Bk3U6UMXAAa4NCvnJPl4kqdmjQ1b1YYkWyeZy8yPJDkoyaAjibyIKc1AH9pPpJu8sw9wCt1uQ98btDAtWpLzgd+l22Hpa3SL591aVYcNVpMBLs1Wkv8IfJRuw5Fv0021/8awVWldklxQVXv34/q3rKq/SXJhVe01VE2OA5dmoO8Dfy7dgkg/oRvffhrdei8fp9uyS5u2JHkM3WqSR/TnlgxYjwEuzcg3gJOAQ6rqunnnV/Y7EWnTdxTwBuDTVXVJP5HnrCELsgtFmoEkKX/YNGEGuDRFST7DnTcYuZOqOmiG5WgDJHlPVR21tu/lkN9Du1Ck6Xrn0AVoo53U325y30tb4NKM9CsQ3reqLh+6Fm2YJMsAqmrV0LWAE3mkmUjyNOBCVi8nu1eS0wYtSouW5C39cseXA99LsirJXw5dlwEuzcZbgEcBNwJU1YU4dLAJSV4D7AfsU1X3rKrtgUcD+yV59ZC1GeDSbNxWVWtuTmH/ZRueBzy7qq6eO1FVV9GN63/+YFVhgEuzckmS59BtH7dbkvcCXx+6KC3K5lX10zVP9v3gg66FYoBLs/FK4CHAr+i2kLuJbmKINn23buBjU+coFEn6LZLcAfxyoYeALapqsFa4AS7NQJIVwBuB5cybf1FVew5Vk9pngEszkORy4M+Bi4Bfz52vqmsGK0rNcyamNBurqspx35ooW+DSDCTZH3g2cCbdhUwAqupTgxWl5tkCl2bjhcCD6IadzXWhFGCAa4PZApdmIMnlVbXH0HVoXBwHLs3G15M8eOgiNC62wKUZSHIZ8ADgaro+8ADlMEJtDANcmoEk91vovMMItTHsQpFmoA/q+wBP6O/fgj9/2ki2wKUZSPJmYAWwR1XtnuTewMerar+BS1PDbAFIs/F04CD6NTWq6kfAtoNWpOYZ4NJs3NrvSl8ASbYeuB6NgAEuzcYpST4IbJfkJcD/A44fuCY1zj5wacqSBNiFbibmk+mGEH6hqs4YtDA1zwCXZiDJRVX1sKHr0LjYhSLNxgVJ9hm6CI2LLXBpBpJ8F3ggcA3dSBRnYmqjGeDSDDgTU9NgF4o0G2+vqmvmfwFvH7ootc0Al2bjIfMPkiwBHjlQLRoJA1yaoiRvSHIzsGeSm/qvm4EbALdY00axD1yagSR/XVVvGLoOjYstcGk2rpx/kGRJv8CVtMEMcGk29k/yuSQ7JXkocA4uZqWNZBeKNCNJngm8n24c+HOq6msDl6TG2QKXZiDJbsCrgE/STeZ5XpKthq1KrTPApdn4DPCmqvoT4PeAK4Dzhi1JrbMLRZqBJHevqpvWOLd7VX1vqJrUPlvg0hQleR1AVd2U5I/XePgFs69IY2KAS9P1rHn31xwH/pRZFqLxMcCl6cpa7i90LK0XA1yarlrL/YWOpfXiRUxpipLcwer1v7cEbpl7CNiiqjYfqja1zwCXpEbZhSJJjTLAJalRBrgkNcoAl6RGGeCS1Kj/D6OQs/4nK51/AAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "allReviewsSentimentDF['SentimentScore'].value_counts().plot(kind='bar')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 72,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>author</th>\n",
       "      <th>body</th>\n",
       "      <th>date</th>\n",
       "      <th>helpful</th>\n",
       "      <th>rating</th>\n",
       "      <th>title</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>[]</td>\n",
       "      <td>already phillips hue bulb around house big fan...</td>\n",
       "      <td>['Reviewed in the United Kingdom on 3 Septembe...</td>\n",
       "      <td>['2 people found this helpful']</td>\n",
       "      <td>['5.0 out of 5 stars']</td>\n",
       "      <td>['\\n\\n\\n\\n\\n\\n\\n\\n  \\n  \\n    ', 'Great for th...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>[]</td>\n",
       "      <td>cant comment installation paid local trader pr...</td>\n",
       "      <td>['Reviewed in the United Kingdom on 7 December...</td>\n",
       "      <td>[]</td>\n",
       "      <td>['4.0 out of 5 stars']</td>\n",
       "      <td>['\\n\\n\\n\\n\\n\\n\\n\\n  \\n  \\n    ', 'Great system...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>[]</td>\n",
       "      <td>ive never fitted healthy interest everything d...</td>\n",
       "      <td>['Reviewed in the United Kingdom on 10 January...</td>\n",
       "      <td>['One person found this helpful']</td>\n",
       "      <td>['5.0 out of 5 stars']</td>\n",
       "      <td>['\\n\\n\\n\\n\\n\\n\\n\\n  \\n  \\n    ', 'Easy to fit ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>[]</td>\n",
       "      <td>combi internetenabled 1 doesnt feature upgradi...</td>\n",
       "      <td>['Reviewed in the United Kingdom on 2 December...</td>\n",
       "      <td>[]</td>\n",
       "      <td>['5.0 out of 5 stars']</td>\n",
       "      <td>['\\n\\n\\n\\n\\n\\n\\n\\n  \\n  \\n    ', 'Installation...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>[]</td>\n",
       "      <td>brought prime day sale got 95 fitted quite str...</td>\n",
       "      <td>['Reviewed in the United Kingdom on 13 August ...</td>\n",
       "      <td>['6 people found this helpful']</td>\n",
       "      <td>['4.0 out of 5 stars']</td>\n",
       "      <td>['\\n\\n\\n\\n\\n\\n\\n\\n  \\n  \\n    ', 'Looks the pa...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  author                                               body  \\\n",
       "0     []  already phillips hue bulb around house big fan...   \n",
       "1     []  cant comment installation paid local trader pr...   \n",
       "2     []  ive never fitted healthy interest everything d...   \n",
       "3     []  combi internetenabled 1 doesnt feature upgradi...   \n",
       "4     []  brought prime day sale got 95 fitted quite str...   \n",
       "\n",
       "                                                date  \\\n",
       "0  ['Reviewed in the United Kingdom on 3 Septembe...   \n",
       "1  ['Reviewed in the United Kingdom on 7 December...   \n",
       "2  ['Reviewed in the United Kingdom on 10 January...   \n",
       "3  ['Reviewed in the United Kingdom on 2 December...   \n",
       "4  ['Reviewed in the United Kingdom on 13 August ...   \n",
       "\n",
       "                             helpful                  rating  \\\n",
       "0    ['2 people found this helpful']  ['5.0 out of 5 stars']   \n",
       "1                                 []  ['4.0 out of 5 stars']   \n",
       "2  ['One person found this helpful']  ['5.0 out of 5 stars']   \n",
       "3                                 []  ['5.0 out of 5 stars']   \n",
       "4    ['6 people found this helpful']  ['4.0 out of 5 stars']   \n",
       "\n",
       "                                               title  \n",
       "0  ['\\n\\n\\n\\n\\n\\n\\n\\n  \\n  \\n    ', 'Great for th...  \n",
       "1  ['\\n\\n\\n\\n\\n\\n\\n\\n  \\n  \\n    ', 'Great system...  \n",
       "2  ['\\n\\n\\n\\n\\n\\n\\n\\n  \\n  \\n    ', 'Easy to fit ...  \n",
       "3  ['\\n\\n\\n\\n\\n\\n\\n\\n  \\n  \\n    ', 'Installation...  \n",
       "4  ['\\n\\n\\n\\n\\n\\n\\n\\n  \\n  \\n    ', 'Looks the pa...  "
      ]
     },
     "execution_count": 72,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "reviewData.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 73,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "167"
      ]
     },
     "execution_count": 73,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(reviewData)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 74,
   "metadata": {},
   "outputs": [],
   "source": [
    "#### Merge the review data with Sentiment generated\n",
    "\n",
    "# add column Polarity Score\n",
    "reviewData['polarityScore'] = allReviewsSentimentDF['body']\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 210,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>author</th>\n",
       "      <th>body</th>\n",
       "      <th>date</th>\n",
       "      <th>helpful</th>\n",
       "      <th>rating</th>\n",
       "      <th>title</th>\n",
       "      <th>polarityScore</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>['Rob A.']</td>\n",
       "      <td>replaced old 80 slide timer clock gravity fed ...</td>\n",
       "      <td>['on 16 January 2018']</td>\n",
       "      <td>['115 people found this helpful']</td>\n",
       "      <td>['5.0 out of 5 stars']</td>\n",
       "      <td>['Really useful, and sort of cool!']</td>\n",
       "      <td>0.347917</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>['S. Newton']</td>\n",
       "      <td>combi water tank unit arrived within couple da...</td>\n",
       "      <td>['on 26 December 2017']</td>\n",
       "      <td>['3 people found this helpful']</td>\n",
       "      <td>['4.0 out of 5 stars']</td>\n",
       "      <td>['Works fine with Alexa. Dubious about claimed...</td>\n",
       "      <td>0.247222</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>['JC']</td>\n",
       "      <td>bough together multizone phoned came installed...</td>\n",
       "      <td>['on 15 July 2017']</td>\n",
       "      <td>['3 people found this helpful']</td>\n",
       "      <td>['5.0 out of 5 stars']</td>\n",
       "      <td>['Have been perfectly happy with the installat...</td>\n",
       "      <td>0.266667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>['Fionn']</td>\n",
       "      <td>delighted setup bought without ireland also bo...</td>\n",
       "      <td>['on 6 February 2017']</td>\n",
       "      <td>['One person found this helpful']</td>\n",
       "      <td>['5.0 out of 5 stars']</td>\n",
       "      <td>['Slick Simple Functionality']</td>\n",
       "      <td>0.235771</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>['Nemesis']</td>\n",
       "      <td>previously purchased drayton wiser wireless tr...</td>\n",
       "      <td>['on 5 March 2018']</td>\n",
       "      <td>[]</td>\n",
       "      <td>['5.0 out of 5 stars']</td>\n",
       "      <td>['Simple, elegant and just works.']</td>\n",
       "      <td>0.064484</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          author                                               body  \\\n",
       "0     ['Rob A.']  replaced old 80 slide timer clock gravity fed ...   \n",
       "1  ['S. Newton']  combi water tank unit arrived within couple da...   \n",
       "2         ['JC']  bough together multizone phoned came installed...   \n",
       "3      ['Fionn']  delighted setup bought without ireland also bo...   \n",
       "4    ['Nemesis']  previously purchased drayton wiser wireless tr...   \n",
       "\n",
       "                      date                            helpful  \\\n",
       "0   ['on 16 January 2018']  ['115 people found this helpful']   \n",
       "1  ['on 26 December 2017']    ['3 people found this helpful']   \n",
       "2      ['on 15 July 2017']    ['3 people found this helpful']   \n",
       "3   ['on 6 February 2017']  ['One person found this helpful']   \n",
       "4      ['on 5 March 2018']                                 []   \n",
       "\n",
       "                   rating                                              title  \\\n",
       "0  ['5.0 out of 5 stars']               ['Really useful, and sort of cool!']   \n",
       "1  ['4.0 out of 5 stars']  ['Works fine with Alexa. Dubious about claimed...   \n",
       "2  ['5.0 out of 5 stars']  ['Have been perfectly happy with the installat...   \n",
       "3  ['5.0 out of 5 stars']                     ['Slick Simple Functionality']   \n",
       "4  ['5.0 out of 5 stars']                ['Simple, elegant and just works.']   \n",
       "\n",
       "   polarityScore  \n",
       "0       0.347917  \n",
       "1       0.247222  \n",
       "2       0.266667  \n",
       "3       0.235771  \n",
       "4       0.064484  "
      ]
     },
     "execution_count": 210,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "reviewData.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 75,
   "metadata": {},
   "outputs": [],
   "source": [
    "negativeReviews = reviewData[reviewData.polarityScore < 0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 76,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>author</th>\n",
       "      <th>body</th>\n",
       "      <th>date</th>\n",
       "      <th>helpful</th>\n",
       "      <th>rating</th>\n",
       "      <th>title</th>\n",
       "      <th>polarityScore</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>[]</td>\n",
       "      <td>product proved pretty unreliable keep disconne...</td>\n",
       "      <td>['Reviewed in the United Kingdom on 23 March 2...</td>\n",
       "      <td>['2 people found this helpful']</td>\n",
       "      <td>['2.0 out of 5 stars']</td>\n",
       "      <td>['\\n\\n\\n\\n\\n\\n\\n\\n  \\n  \\n    ', 'Thermostat k...</td>\n",
       "      <td>-0.173611</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>[]</td>\n",
       "      <td>struggled different expensive system since new...</td>\n",
       "      <td>['Reviewed in the United Kingdom on 25 May 2021']</td>\n",
       "      <td>[]</td>\n",
       "      <td>['5.0 out of 5 stars']</td>\n",
       "      <td>['\\n\\n\\n\\n\\n\\n\\n\\n  \\n  \\n    ', 'Simples', '\\...</td>\n",
       "      <td>-0.015383</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>[]</td>\n",
       "      <td>warning unit buggy firstly look blog unable co...</td>\n",
       "      <td>['Reviewed in the United Kingdom on 17 January...</td>\n",
       "      <td>[]</td>\n",
       "      <td>['1.0 out of 5 stars']</td>\n",
       "      <td>['\\n\\n\\n\\n\\n\\n\\n\\n  \\n  \\n    ', 'Unfinished s...</td>\n",
       "      <td>-0.131250</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>[]</td>\n",
       "      <td>cant without plumber wired dont thing hard min...</td>\n",
       "      <td>['Reviewed in the United Kingdom on 6 June 2020']</td>\n",
       "      <td>[]</td>\n",
       "      <td>['5.0 out of 5 stars']</td>\n",
       "      <td>['\\n\\n\\n\\n\\n\\n\\n\\n  \\n  \\n    ', 'Easy to use ...</td>\n",
       "      <td>-0.291667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>[]</td>\n",
       "      <td>simply could pair begin didnt spot old program...</td>\n",
       "      <td>['Reviewed in the United Kingdom on 6 January ...</td>\n",
       "      <td>[]</td>\n",
       "      <td>['3.0 out of 5 stars']</td>\n",
       "      <td>['\\n\\n\\n\\n\\n\\n\\n\\n  \\n  \\n    ', 'Struggled to...</td>\n",
       "      <td>-0.116667</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  author                                               body  \\\n",
       "8     []  product proved pretty unreliable keep disconne...   \n",
       "3     []  struggled different expensive system since new...   \n",
       "7     []  warning unit buggy firstly look blog unable co...   \n",
       "3     []  cant without plumber wired dont thing hard min...   \n",
       "5     []  simply could pair begin didnt spot old program...   \n",
       "\n",
       "                                                date  \\\n",
       "8  ['Reviewed in the United Kingdom on 23 March 2...   \n",
       "3  ['Reviewed in the United Kingdom on 25 May 2021']   \n",
       "7  ['Reviewed in the United Kingdom on 17 January...   \n",
       "3  ['Reviewed in the United Kingdom on 6 June 2020']   \n",
       "5  ['Reviewed in the United Kingdom on 6 January ...   \n",
       "\n",
       "                           helpful                  rating  \\\n",
       "8  ['2 people found this helpful']  ['2.0 out of 5 stars']   \n",
       "3                               []  ['5.0 out of 5 stars']   \n",
       "7                               []  ['1.0 out of 5 stars']   \n",
       "3                               []  ['5.0 out of 5 stars']   \n",
       "5                               []  ['3.0 out of 5 stars']   \n",
       "\n",
       "                                               title  polarityScore  \n",
       "8  ['\\n\\n\\n\\n\\n\\n\\n\\n  \\n  \\n    ', 'Thermostat k...      -0.173611  \n",
       "3  ['\\n\\n\\n\\n\\n\\n\\n\\n  \\n  \\n    ', 'Simples', '\\...      -0.015383  \n",
       "7  ['\\n\\n\\n\\n\\n\\n\\n\\n  \\n  \\n    ', 'Unfinished s...      -0.131250  \n",
       "3  ['\\n\\n\\n\\n\\n\\n\\n\\n  \\n  \\n    ', 'Easy to use ...      -0.291667  \n",
       "5  ['\\n\\n\\n\\n\\n\\n\\n\\n  \\n  \\n    ', 'Struggled to...      -0.116667  "
      ]
     },
     "execution_count": 76,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "negativeReviews.head()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
